From 4cf95475e578698b0347078d2c6cf176cd0c15eb Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 26 Feb 2025 10:52:33 -0800 Subject: [PATCH] fix: make vision and embedding tests pass with openai, anthropic and gemini NOTE - Anthropic embeddings do not work due to LiteLLM not supporting them. --- llama_stack/providers/remote/inference/openai/models.py | 4 ++-- .../providers/utils/inference/litellm_openai_mixin.py | 1 - llama_stack/providers/utils/inference/openai_compat.py | 8 ++++++-- llama_stack/templates/dev/run.yaml | 2 ++ 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py index 657895f27..1737043a4 100644 --- a/llama_stack/providers/remote/inference/openai/models.py +++ b/llama_stack/providers/remote/inference/openai/models.py @@ -20,11 +20,11 @@ MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] ProviderModelEntry( provider_model_id="openai/text-embedding-3-small", model_type=ModelType.embedding, - metadata={"embedding_dimension": 1536}, + metadata={"embedding_dimension": 1536, "context_length": 8192}, ), ProviderModelEntry( provider_model_id="openai/text-embedding-3-large", model_type=ModelType.embedding, - metadata={"embedding_dimension": 3072}, + metadata={"embedding_dimension": 3072, "context_length": 8192}, ), ] diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 0f53b5b88..a916e4f99 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -97,7 +97,6 @@ class LiteLLMOpenAIMixin( ) params = await self._get_params(request) - # unfortunately, we need to use synchronous litellm.completion here because litellm # caches various httpx.client objects in a non-eventloop aware manner response = litellm.completion(**params) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 1e684f4a3..1f1306f0d 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -523,15 +523,19 @@ async def convert_message_to_openai_dict_new(message: Message | Dict) -> OpenAIC ) -> Union[str, Iterable[OpenAIChatCompletionContentPartParam]]: # Llama Stack and OpenAI spec match for str and text input if isinstance(content, str): - return content + return OpenAIChatCompletionContentPartTextParam( + type="text", + text=content, + ) elif isinstance(content, TextContentItem): return OpenAIChatCompletionContentPartTextParam( + type="text", text=content.text, ) elif isinstance(content, ImageContentItem): return OpenAIChatCompletionContentPartImageParam( - image_url=OpenAIImageURL(url=await convert_image_content_to_url(content)), type="image_url", + image_url=OpenAIImageURL(url=await convert_image_content_to_url(content)), ) elif isinstance(content, List): return [await _convert_user_message_content(item) for item in content] diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index ab54f1a57..448a3aec7 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -119,12 +119,14 @@ models: model_type: llm - metadata: embedding_dimension: 1536 + context_length: 8192 model_id: openai/text-embedding-3-small provider_id: openai provider_model_id: openai/text-embedding-3-small model_type: embedding - metadata: embedding_dimension: 3072 + context_length: 8192 model_id: openai/text-embedding-3-large provider_id: openai provider_model_id: openai/text-embedding-3-large