Rework InterleavedContentMedia datatype so URL downloading is in llama-stack

2025-12-17 20:02:37 +00:00 · 2024-12-15 13:23:30 -08:00 · 2024-12-15 13:23:30 -08:00 · a9a041a1de
commit a9a041a1de
parent c2f7905fa4
10 changed files with 368 additions and 146 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -22,6 +22,7 @@ from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
+    convert_message_to_openai_dict,
    get_sampling_options,
    process_chat_completion_response,
    process_chat_completion_stream_response,
@ -30,7 +31,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_prompt,
    completion_request_to_prompt,
    content_has_media,
-    convert_message_to_dict,
+    interleaved_content_as_str,
    request_has_media,
 )

@ -71,7 +72,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
    async def completion(
        self,
        model_id: str,
-        content: InterleavedTextMedia,
+        content: InterleavedContent,
        sampling_params: Optional[SamplingParams] = SamplingParams(),
        response_format: Optional[ResponseFormat] = None,
        stream: Optional[bool] = False,
@ -163,7 +164,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
            if media_present:
                # vllm does not seem to work well with image urls, so we download the images
                input_dict["messages"] = [
-                    await convert_message_to_dict(m, download=True)
+                    await convert_message_to_openai_dict(m, download=True)
                    for m in request.messages
                ]
            else:
@ -202,7 +203,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
    async def embeddings(
        self,
        model_id: str,
-        contents: List[InterleavedTextMedia],
+        contents: List[InterleavedContent],
    ) -> EmbeddingsResponse:
        model = await self.model_store.get_model(model_id)

@ -215,7 +216,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        ), "VLLM does not support media for embeddings"
        response = self.client.embeddings.create(
            model=model.provider_resource_id,
-            input=[interleaved_text_media_as_str(content) for content in contents],
+            input=[interleaved_content_as_str(content) for content in contents],
            **kwargs,
        )