feat(internal): add image_url download feature to OpenAIMixin (#3516)

# What does this PR do? simplify Ollama inference adapter by - - moving image_url download code to OpenAIMixin - being a ModelRegistryHelper instead of having one (mypy blocks check_model_availability method assignment) ## Test Plan - add unit tests for new download feature - add integration tests for openai_chat_completion w/ image_url (close test gap)
2025-12-03 09:53:45 +00:00 · 2025-09-26 17:32:16 -04:00 · 2025-09-26 17:32:16 -04:00 · b48d5cfed7
commit b48d5cfed7
parent 4487b88ffe
5 changed files with 253 additions and 84 deletions
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import base64
 import uuid
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator
@ -26,6 +27,7 @@ from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
+from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content

 logger = get_logger(name=__name__, category="providers::utils")

@ -51,6 +53,10 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
    # This is useful for providers that do not return a unique id in the response.
    overwrite_completion_id: bool = False

+    # Allow subclasses to control whether to download images and convert to base64
+    # for providers that require base64 encoded images instead of URLs.
+    download_images: bool = False
+
    # Embedding model metadata for this provider
    # Can be set by subclasses or instances to provide embedding models
    # Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
@ -239,6 +245,24 @@ class OpenAIMixin(ModelRegistryHelper, ABC):
        """
        Direct OpenAI chat completion API call.
        """
+        if self.download_images:
+
+            async def _localize_image_url(m: OpenAIMessageParam) -> OpenAIMessageParam:
+                if isinstance(m.content, list):
+                    for c in m.content:
+                        if c.type == "image_url" and c.image_url and c.image_url.url and "http" in c.image_url.url:
+                            localize_result = await localize_image_content(c.image_url.url)
+                            if localize_result is None:
+                                raise ValueError(
+                                    f"Failed to localize image content from {c.image_url.url[:42]}{'...' if len(c.image_url.url) > 42 else ''}"
+                                )
+                            content, format = localize_result
+                            c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
+                # else it's a string and we don't need to modify it
+                return m
+
+            messages = [await _localize_image_url(m) for m in messages]
+
        resp = await self.client.chat.completions.create(
            **await prepare_openai_completion_params(
                model=await self._get_provider_model_id(model),