From 3a9468ce9b06dc066d20622c3e5e916163ba2218 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 17 Jan 2025 18:33:40 -0800 Subject: [PATCH] fix again vllm for non base64 (#818) # What does this PR do? - previous fix introduced regression for non base64 image - add back download, and base64 check ## Test Plan image ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests. --- llama_stack/providers/remote/inference/vllm/vllm.py | 3 ++- llama_stack/providers/utils/inference/prompt_adapter.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 81c746cce..1dbb4ecfa 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -177,7 +177,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): if isinstance(request, ChatCompletionRequest): if media_present: input_dict["messages"] = [ - await convert_message_to_openai_dict(m) for m in request.messages + await convert_message_to_openai_dict(m, download=True) + for m in request.messages ] else: input_dict["prompt"] = await chat_completion_request_to_prompt( diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index 7ee19fd7b..701b2ca3b 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -188,7 +188,7 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]: async def convert_image_content_to_url( media: ImageContentItem, download: bool = False, include_format: bool = True ) -> str: - if media.url and not download: + if media.url and (not download or media.url.uri.startswith("data")): return media.url.uri content, format = await localize_image_content(media)