feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-11-13 14:51:17 -05:00 · 2025-11-13 14:51:17 -05:00 · 840ad75fe9
commit 840ad75fe9
parent ceb716b9a0
358 changed files with 2337 additions and 1424 deletions
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class AnthropicProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@ -7,10 +7,10 @@
 import os
 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class AzureProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@ -6,9 +6,7 @@

 from collections.abc import AsyncIterator, Iterable

-from openai import AuthenticationError
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionRequestWithExtraBody,
@ -17,6 +15,8 @@ from llama_stack.apis.inference import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
+from openai import AuthenticationError
+
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@ -6,10 +6,11 @@

 from urllib.parse import urljoin

-from llama_stack.apis.inference import (
+from llama_stack_api import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .config import CerebrasImplConfig
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@ -7,10 +7,10 @@
 import os
 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type

 DEFAULT_BASE_URL = "https://api.cerebras.ai"

--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class DatabricksProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@ -7,8 +7,8 @@
 from collections.abc import Iterable

 from databricks.sdk import WorkspaceClient
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody

-from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


@json_schema_type
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class GeminiProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@ -6,12 +6,13 @@

 from typing import Any

-from llama_stack.apis.inference import (
+from llama_stack_api import (
    OpenAIEmbeddingData,
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .config import GeminiConfig
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class GroqProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class LlamaProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -4,12 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
    OpenAICompletion,
    OpenAICompletionRequestWithExtraBody,
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
+
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
--- a/src/llama_stack/providers/remote/inference/nvidia/init.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/init.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.inference import Inference
+from llama_stack_api import Inference

 from .config import NVIDIAConfig

--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@ -7,10 +7,10 @@
 import os
 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class NVIDIAProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -8,16 +8,15 @@
 from collections.abc import Iterable

 import aiohttp
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
    RerankData,
    RerankResponse,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack.apis.models import Model, ModelType
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

--- a/src/llama_stack/providers/remote/inference/oci/init.py
+++ b/src/llama_stack/providers/remote/inference/oci/init.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.inference import InferenceProvider
+from llama_stack_api import InferenceProvider

 from .config import OCIConfig

--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@ -7,10 +7,10 @@
 import os
 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class OCIProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@ -10,15 +10,15 @@ from typing import Any

 import httpx
 import oci
+from llama_stack_api import (
+    ModelType,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient

-from llama_stack.apis.inference.inference import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.models import ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@ -7,15 +7,15 @@

 import asyncio

-from ollama import AsyncClient as AsyncOllamaClient
-
-from llama_stack.apis.common.errors import UnsupportedModelError
-from llama_stack.apis.models import Model
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
    HealthResponse,
    HealthStatus,
+    Model,
+    UnsupportedModelError,
 )
+from ollama import AsyncClient as AsyncOllamaClient
+
+from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class OpenAIProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


@json_schema_type
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@ -6,10 +6,9 @@

 from collections.abc import AsyncIterator

-from openai import AsyncOpenAI
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
    Inference,
+    Model,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionRequestWithExtraBody,
@ -18,7 +17,8 @@ from llama_stack.apis.inference import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
+from openai import AsyncOpenAI
+
 from llama_stack.core.request_headers import NeedsRequestProviderData

 from .config import PassthroughImplConfig
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class RunpodProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@ -6,11 +6,12 @@

 from collections.abc import AsyncIterator

-from llama_stack.apis.inference import (
+from llama_stack_api import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionRequestWithExtraBody,
 )
+
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .config import RunpodImplConfig
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class SambaNovaProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@ -5,10 +5,10 @@
 # the root directory of this source tree.


+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


@json_schema_type
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@ -8,12 +8,12 @@
 from collections.abc import Iterable

 from huggingface_hub import AsyncInferenceClient, HfApi
-from pydantic import SecretStr
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
+from pydantic import SecretStr
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


@json_schema_type
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@ -8,15 +8,15 @@
 from collections.abc import Iterable
 from typing import Any, cast

+from llama_stack_api import (
+    Model,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]

-from llama_stack.apis.inference import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
-from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
-from llama_stack.apis.models import Model
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@ -6,10 +6,10 @@

 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class VertexAIProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@ -6,10 +6,10 @@

 from pathlib import Path

+from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


@json_schema_type
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@ -7,19 +7,17 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin

 import httpx
-from pydantic import ConfigDict
-
-from llama_stack.apis.inference import (
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionRequestWithExtraBody,
    ToolChoice,
 )
+from pydantic import ConfigDict
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    HealthResponse,
-    HealthStatus,
-)
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .config import VLLMInferenceAdapterConfig
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@ -7,10 +7,10 @@
 import os
 from typing import Any

+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.schema_utils import json_schema_type


 class WatsonXProviderDataValidator(BaseModel):
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -9,8 +9,9 @@ from typing import Any

 import litellm
 import requests
-
-from llama_stack.apis.inference.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionRequestWithExtraBody,
@ -20,8 +21,7 @@ from llama_stack.apis.inference.inference import (
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
 )
-from llama_stack.apis.models import Model
-from llama_stack.apis.models.models import ModelType
+
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
@ -238,7 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
        )

        # Convert response to OpenAI format
-        from llama_stack.apis.inference import OpenAIEmbeddingUsage
+        from llama_stack_api import OpenAIEmbeddingUsage
+
        from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response

        data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)