Merge branch 'main' into remove-batch-inference

2025-10-07 04:45:44 +00:00 · 2025-09-26 11:25:56 -04:00 · 2025-09-26 11:25:56 -04:00 · 32b87bf88a
commit 32b87bf88a
parent f6d1867bf5 c88c4ff2c6
748 changed files with 127607 additions and 50032 deletions
--- a/llama_stack/providers/remote/inference/openai/init.py
+++ b/llama_stack/providers/remote/inference/openai/init.py
@ -4,15 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from pydantic import BaseModel
-
 from .config import OpenAIConfig


-class OpenAIProviderDataValidator(BaseModel):
-    openai_api_key: str | None = None
-
-
 async def get_adapter_impl(config: OpenAIConfig, _deps):
    from .openai import OpenAIInferenceAdapter

--- a/llama_stack/providers/remote/inference/openai/models.py
+++ b/llama_stack/providers/remote/inference/openai/models.py
@ -1,60 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from dataclasses import dataclass
-
-from llama_stack.apis.models import ModelType
-from llama_stack.providers.utils.inference.model_registry import (
-    ProviderModelEntry,
-)
-
-LLM_MODEL_IDS = [
-    "gpt-3.5-turbo-0125",
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-instruct",
-    "gpt-4",
-    "gpt-4-turbo",
-    "gpt-4o",
-    "gpt-4o-2024-08-06",
-    "gpt-4o-mini",
-    "gpt-4o-audio-preview",
-    "chatgpt-4o-latest",
-    "o1",
-    "o1-mini",
-    "o3-mini",
-    "o4-mini",
-]
-
-
-@dataclass
-class EmbeddingModelInfo:
-    """Structured representation of embedding model information."""
-
-    embedding_dimension: int
-    context_length: int
-
-
-EMBEDDING_MODEL_IDS: dict[str, EmbeddingModelInfo] = {
-    "text-embedding-3-small": EmbeddingModelInfo(1536, 8192),
-    "text-embedding-3-large": EmbeddingModelInfo(3072, 8192),
-}
-SAFETY_MODELS_ENTRIES = []
-
-MODEL_ENTRIES = (
-    [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS]
-    + [
-        ProviderModelEntry(
-            provider_model_id=model_id,
-            model_type=ModelType.embedding,
-            metadata={
-                "embedding_dimension": model_info.embedding_dimension,
-                "context_length": model_info.context_length,
-            },
-        )
-        for model_id, model_info in EMBEDDING_MODEL_IDS.items()
-    ]
-    + SAFETY_MODELS_ENTRIES
-)
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@ -9,7 +9,6 @@ from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOp
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .config import OpenAIConfig
-from .models import MODEL_ENTRIES

 logger = get_logger(name=__name__, category="inference::openai")

@ -38,10 +37,14 @@ class OpenAIInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
    - ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
    """

+    embedding_model_metadata = {
+        "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
+        "text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
+    }
+
    def __init__(self, config: OpenAIConfig) -> None:
        LiteLLMOpenAIMixin.__init__(
            self,
-            MODEL_ENTRIES,
            litellm_provider_name="openai",
            api_key_from_config=config.api_key,
            provider_data_api_key_field="openai_api_key",