From cef35bb3db51fef068a07d9d56a8e328fe365ed9 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Fri, 31 Jan 2025 10:25:29 -0500 Subject: [PATCH] detect image.data mime type with pillow --- .../remote/inference/nvidia/openai_utils.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py index 2e3d6affa..655d70282 100644 --- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py +++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py @@ -4,8 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import base64 import json import warnings +from io import BytesIO from typing import Any, AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union from llama_models.datatypes import ( @@ -44,6 +46,8 @@ from openai.types.chat.chat_completion_message_tool_call_param import ( from openai.types.completion import Completion as OpenAICompletion from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs +from PIL import Image + from llama_stack.apis.common.content_types import ( ImageContentItem, InterleavedContent, @@ -186,9 +190,18 @@ def _convert_message(message: Message | Dict) -> OpenAIChatCompletionMessage: type="image_url", ) elif content.image.data: + mime_type = Image.MIME[ + Image.open( + BytesIO( + base64.b64decode( + content.image.data + ) # TODO(mf): do this more efficiently, decode less + ) + ).format + ] return OpenAIChatCompletionContentPartImageParam( image_url=OpenAIImageURL( - url=f"data:image/png;base64,{content.image.data}" # TODO(mf): how do we know the type? + url=f"data:{mime_type};base64,{content.image.data}" ), type="image_url", )