diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py index 1737043a4..e029c456c 100644 --- a/llama_stack/providers/remote/inference/openai/models.py +++ b/llama_stack/providers/remote/inference/openai/models.py @@ -4,27 +4,60 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from dataclasses import dataclass + from llama_stack.apis.models.models import ModelType from llama_stack.providers.utils.inference.model_registry import ( ProviderModelEntry, ) LLM_MODEL_IDS = [ + # the models w/ "openai/" prefix are the litellm specific model names. + # they should be deprecated in favor of the canonical openai model names. "openai/gpt-4o", "openai/gpt-4o-mini", "openai/chatgpt-4o-latest", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo", + "gpt-3.5-turbo-instruct", + "gpt-4", + "gpt-4-turbo", + "gpt-4o", + "gpt-4o-2024-08-06", + "gpt-4o-mini", + "gpt-4o-audio-preview", + "chatgpt-4o-latest", + "o1", + "o1-mini", + "o3-mini", + "o4-mini", ] +@dataclass +class EmbeddingModelInfo: + """Structured representation of embedding model information.""" + + embedding_dimension: int + context_length: int + + +EMBEDDING_MODEL_IDS: dict[str, EmbeddingModelInfo] = { + "openai/text-embedding-3-small": EmbeddingModelInfo(1536, 8192), + "openai/text-embedding-3-large": EmbeddingModelInfo(3072, 8192), + "text-embedding-3-small": EmbeddingModelInfo(1536, 8192), + "text-embedding-3-large": EmbeddingModelInfo(3072, 8192), +} + + MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + [ ProviderModelEntry( - provider_model_id="openai/text-embedding-3-small", + provider_model_id=model_id, model_type=ModelType.embedding, - metadata={"embedding_dimension": 1536, "context_length": 8192}, - ), - ProviderModelEntry( - provider_model_id="openai/text-embedding-3-large", - model_type=ModelType.embedding, - metadata={"embedding_dimension": 3072, "context_length": 8192}, - ), + metadata={ + "embedding_dimension": model_info.embedding_dimension, + "context_length": model_info.context_length, + }, + ) + for model_id, model_info in EMBEDDING_MODEL_IDS.items() ] diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 6b9c02e6c..76218e87e 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -19,6 +19,13 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): provider_data_api_key_field="openai_api_key", ) self.config = config + # we set is_openai_compat so users can use the canonical + # openai model names like "gpt-4" or "gpt-3.5-turbo" + # and the model name will be translated to litellm's + # "openai/gpt-4" or "openai/gpt-3.5-turbo" transparently. + # if we do not set this, users will be exposed to the + # litellm specific model names, an abstraction leak. + self.is_openai_compat = True async def initialize(self) -> None: await super().initialize() diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index c3c2ab61f..0a5c5e4f4 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -62,6 +62,9 @@ class LiteLLMOpenAIMixin( Inference, NeedsRequestProviderData, ): + # TODO: avoid exposing the litellm specific model names to the user. + # potential change: add a prefix param that gets added to the model name + # when calling litellm. def __init__( self, model_entries, @@ -92,7 +95,9 @@ class LiteLLMOpenAIMixin( return model def get_litellm_model_name(self, model_id: str) -> str: - return "openai/" + model_id if self.is_openai_compat else model_id + # users may be using openai/ prefix in their model names. the openai/models.py did this by default. + # model_id.startswith("openai/") is for backwards compatibility. + return "openai/" + model_id if self.is_openai_compat and not model_id.startswith("openai/") else model_id async def completion( self, diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 236cb17fe..a3b51e7bf 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -149,6 +149,76 @@ models: provider_id: openai provider_model_id: openai/chatgpt-4o-latest model_type: llm +- metadata: {} + model_id: gpt-3.5-turbo-0125 + provider_id: openai + provider_model_id: gpt-3.5-turbo-0125 + model_type: llm +- metadata: {} + model_id: gpt-3.5-turbo + provider_id: openai + provider_model_id: gpt-3.5-turbo + model_type: llm +- metadata: {} + model_id: gpt-3.5-turbo-instruct + provider_id: openai + provider_model_id: gpt-3.5-turbo-instruct + model_type: llm +- metadata: {} + model_id: gpt-4 + provider_id: openai + provider_model_id: gpt-4 + model_type: llm +- metadata: {} + model_id: gpt-4-turbo + provider_id: openai + provider_model_id: gpt-4-turbo + model_type: llm +- metadata: {} + model_id: gpt-4o + provider_id: openai + provider_model_id: gpt-4o + model_type: llm +- metadata: {} + model_id: gpt-4o-2024-08-06 + provider_id: openai + provider_model_id: gpt-4o-2024-08-06 + model_type: llm +- metadata: {} + model_id: gpt-4o-mini + provider_id: openai + provider_model_id: gpt-4o-mini + model_type: llm +- metadata: {} + model_id: gpt-4o-audio-preview + provider_id: openai + provider_model_id: gpt-4o-audio-preview + model_type: llm +- metadata: {} + model_id: chatgpt-4o-latest + provider_id: openai + provider_model_id: chatgpt-4o-latest + model_type: llm +- metadata: {} + model_id: o1 + provider_id: openai + provider_model_id: o1 + model_type: llm +- metadata: {} + model_id: o1-mini + provider_id: openai + provider_model_id: o1-mini + model_type: llm +- metadata: {} + model_id: o3-mini + provider_id: openai + provider_model_id: o3-mini + model_type: llm +- metadata: {} + model_id: o4-mini + provider_id: openai + provider_model_id: o4-mini + model_type: llm - metadata: embedding_dimension: 1536 context_length: 8192 @@ -163,6 +233,20 @@ models: provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding +- metadata: + embedding_dimension: 1536 + context_length: 8192 + model_id: text-embedding-3-small + provider_id: openai + provider_model_id: text-embedding-3-small + model_type: embedding +- metadata: + embedding_dimension: 3072 + context_length: 8192 + model_id: text-embedding-3-large + provider_id: openai + provider_model_id: text-embedding-3-large + model_type: embedding - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_id: fireworks diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml index 73fbcfef5..d656e57e2 100644 --- a/llama_stack/templates/verification/run.yaml +++ b/llama_stack/templates/verification/run.yaml @@ -151,6 +151,76 @@ models: provider_id: openai provider_model_id: openai/chatgpt-4o-latest model_type: llm +- metadata: {} + model_id: gpt-3.5-turbo-0125 + provider_id: openai + provider_model_id: gpt-3.5-turbo-0125 + model_type: llm +- metadata: {} + model_id: gpt-3.5-turbo + provider_id: openai + provider_model_id: gpt-3.5-turbo + model_type: llm +- metadata: {} + model_id: gpt-3.5-turbo-instruct + provider_id: openai + provider_model_id: gpt-3.5-turbo-instruct + model_type: llm +- metadata: {} + model_id: gpt-4 + provider_id: openai + provider_model_id: gpt-4 + model_type: llm +- metadata: {} + model_id: gpt-4-turbo + provider_id: openai + provider_model_id: gpt-4-turbo + model_type: llm +- metadata: {} + model_id: gpt-4o + provider_id: openai + provider_model_id: gpt-4o + model_type: llm +- metadata: {} + model_id: gpt-4o-2024-08-06 + provider_id: openai + provider_model_id: gpt-4o-2024-08-06 + model_type: llm +- metadata: {} + model_id: gpt-4o-mini + provider_id: openai + provider_model_id: gpt-4o-mini + model_type: llm +- metadata: {} + model_id: gpt-4o-audio-preview + provider_id: openai + provider_model_id: gpt-4o-audio-preview + model_type: llm +- metadata: {} + model_id: chatgpt-4o-latest + provider_id: openai + provider_model_id: chatgpt-4o-latest + model_type: llm +- metadata: {} + model_id: o1 + provider_id: openai + provider_model_id: o1 + model_type: llm +- metadata: {} + model_id: o1-mini + provider_id: openai + provider_model_id: o1-mini + model_type: llm +- metadata: {} + model_id: o3-mini + provider_id: openai + provider_model_id: o3-mini + model_type: llm +- metadata: {} + model_id: o4-mini + provider_id: openai + provider_model_id: o4-mini + model_type: llm - metadata: embedding_dimension: 1536 context_length: 8192 @@ -165,6 +235,20 @@ models: provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding +- metadata: + embedding_dimension: 1536 + context_length: 8192 + model_id: text-embedding-3-small + provider_id: openai + provider_model_id: text-embedding-3-small + model_type: embedding +- metadata: + embedding_dimension: 3072 + context_length: 8192 + model_id: text-embedding-3-large + provider_id: openai + provider_model_id: text-embedding-3-large + model_type: embedding - metadata: {} model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_id: fireworks-openai-compat