Merge branch 'main' into chroma

2025-12-03 18:00:36 +00:00 · 2025-09-11 20:51:31 +09:00 · 2025-09-11 20:51:31 +09:00 · 6bdcfc2627
commit 6bdcfc2627
parent 11c71c958e f31bcc11bc
27 changed files with 6409 additions and 15 deletions
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@ -17,6 +17,7 @@ distribution_spec:
    - provider_type: remote::vertexai
    - provider_type: remote::groq
    - provider_type: remote::sambanova
+    - provider_type: remote::azure
    - provider_type: inline::sentence-transformers
    vector_io:
    - provider_type: inline::faiss
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@ -81,6 +81,13 @@ providers:
    config:
      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
  vector_io:
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@ -18,6 +18,7 @@ distribution_spec:
    - provider_type: remote::vertexai
    - provider_type: remote::groq
    - provider_type: remote::sambanova
+    - provider_type: remote::azure
    - provider_type: inline::sentence-transformers
    vector_io:
    - provider_type: inline::faiss
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@ -81,6 +81,13 @@ providers:
    config:
      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
  vector_io:
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@ -18,6 +18,7 @@ distribution_spec:
    - provider_type: remote::vertexai
    - provider_type: remote::groq
    - provider_type: remote::sambanova
+    - provider_type: remote::azure
    - provider_type: inline::sentence-transformers
    vector_io:
    - provider_type: inline::faiss
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@ -81,6 +81,13 @@ providers:
    config:
      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
  vector_io:
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@ -59,6 +59,7 @@ ENABLED_INFERENCE_PROVIDERS = [
    "cerebras",
    "nvidia",
    "bedrock",
+    "azure",
 ]

 INFERENCE_PROVIDER_IDS = {
@ -68,6 +69,7 @@ INFERENCE_PROVIDER_IDS = {
    "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
    "nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
    "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
+    "azure": "${env.AZURE_API_KEY:+azure}",
 }


@ -277,5 +279,21 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                "http://localhost:11434",
                "Ollama URL",
            ),
+            "AZURE_API_KEY": (
+                "",
+                "Azure API Key",
+            ),
+            "AZURE_API_BASE": (
+                "",
+                "Azure API Base",
+            ),
+            "AZURE_API_VERSION": (
+                "",
+                "Azure API Version",
+            ),
+            "AZURE_API_TYPE": (
+                "azure",
+                "Azure API Type",
+            ),
        },
    )
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -295,4 +295,19 @@ Available Models:
                description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
            ),
        ),
+        remote_provider_spec(
+            api=Api.inference,
+            adapter=AdapterSpec(
+                adapter_type="azure",
+                pip_packages=["litellm"],
+                module="llama_stack.providers.remote.inference.azure",
+                config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
+                description="""
+Azure OpenAI inference provider for accessing GPT models and other Azure services.
+Provider documentation
+https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
+""",
+            ),
+        ),
    ]
--- a/llama_stack/providers/remote/inference/azure/init.py
+++ b/llama_stack/providers/remote/inference/azure/init.py
@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import AzureConfig
+
+
+async def get_adapter_impl(config: AzureConfig, _deps):
+    from .azure import AzureInferenceAdapter
+
+    impl = AzureInferenceAdapter(config)
+    await impl.initialize()
+    return impl
--- a/llama_stack/providers/remote/inference/azure/azure.py
+++ b/llama_stack/providers/remote/inference/azure/azure.py
@ -0,0 +1,64 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+from urllib.parse import urljoin
+
+from llama_stack.apis.inference import ChatCompletionRequest
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+    LiteLLMOpenAIMixin,
+)
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+
+from .config import AzureConfig
+from .models import MODEL_ENTRIES
+
+
+class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
+    def __init__(self, config: AzureConfig) -> None:
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            MODEL_ENTRIES,
+            litellm_provider_name="azure",
+            api_key_from_config=config.api_key.get_secret_value(),
+            provider_data_api_key_field="azure_api_key",
+            openai_compat_api_base=str(config.api_base),
+        )
+        self.config = config
+
+    # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
+    get_api_key = LiteLLMOpenAIMixin.get_api_key
+
+    def get_base_url(self) -> str:
+        """
+        Get the Azure API base URL.
+
+        Returns the Azure API base URL from the configuration.
+        """
+        return urljoin(str(self.config.api_base), "/openai/v1")
+
+    async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
+        # Get base parameters from parent
+        params = await super()._get_params(request)
+
+        # Add Azure specific parameters
+        provider_data = self.get_request_provider_data()
+        if provider_data:
+            if getattr(provider_data, "azure_api_key", None):
+                params["api_key"] = provider_data.azure_api_key
+            if getattr(provider_data, "azure_api_base", None):
+                params["api_base"] = provider_data.azure_api_base
+            if getattr(provider_data, "azure_api_version", None):
+                params["api_version"] = provider_data.azure_api_version
+            if getattr(provider_data, "azure_api_type", None):
+                params["api_type"] = provider_data.azure_api_type
+        else:
+            params["api_key"] = self.config.api_key.get_secret_value()
+            params["api_base"] = str(self.config.api_base)
+            params["api_version"] = self.config.api_version
+            params["api_type"] = self.config.api_type
+
+        return params
--- a/llama_stack/providers/remote/inference/azure/config.py
+++ b/llama_stack/providers/remote/inference/azure/config.py
@ -0,0 +1,63 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from typing import Any
+
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class AzureProviderDataValidator(BaseModel):
+    azure_api_key: SecretStr = Field(
+        description="Azure API key for Azure",
+    )
+    azure_api_base: HttpUrl = Field(
+        description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
+    )
+    azure_api_version: str | None = Field(
+        default=None,
+        description="Azure API version for Azure (e.g., 2024-06-01)",
+    )
+    azure_api_type: str | None = Field(
+        default="azure",
+        description="Azure API type for Azure (e.g., azure)",
+    )
+
+
+@json_schema_type
+class AzureConfig(BaseModel):
+    api_key: SecretStr = Field(
+        description="Azure API key for Azure",
+    )
+    api_base: HttpUrl = Field(
+        description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
+    )
+    api_version: str | None = Field(
+        default_factory=lambda: os.getenv("AZURE_API_VERSION"),
+        description="Azure API version for Azure (e.g., 2024-12-01-preview)",
+    )
+    api_type: str | None = Field(
+        default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"),
+        description="Azure API type for Azure (e.g., azure)",
+    )
+
+    @classmethod
+    def sample_run_config(
+        cls,
+        api_key: str = "${env.AZURE_API_KEY:=}",
+        api_base: str = "${env.AZURE_API_BASE:=}",
+        api_version: str = "${env.AZURE_API_VERSION:=}",
+        api_type: str = "${env.AZURE_API_TYPE:=}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "api_key": api_key,
+            "api_base": api_base,
+            "api_version": api_version,
+            "api_type": api_type,
+        }
--- a/llama_stack/providers/remote/inference/azure/models.py
+++ b/llama_stack/providers/remote/inference/azure/models.py
@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.utils.inference.model_registry import (
+    ProviderModelEntry,
+)
+
+# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions
+LLM_MODEL_IDS = [
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-5-chat",
+    "o1",
+    "o1-mini",
+    "o3-mini",
+    "o4-mini",
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+]
+
+SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
+
+MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -105,8 +105,12 @@ def _deserialize_response(data: dict[str, Any]) -> Any:

            return cls.model_validate(data["__data__"])
        except (ImportError, AttributeError, TypeError, ValueError) as e:
-            logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
-            return data["__data__"]
+            logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_validate: {e}")
+            try:
+                return cls.model_construct(**data["__data__"])
+            except Exception as e:
+                logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_construct: {e}")
+                return data["__data__"]

    return data