feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 18:00:36 +00:00 · 2025-11-13 14:51:17 -05:00 · 2025-11-13 14:51:17 -05:00 · 840ad75fe9
commit 840ad75fe9
parent ceb716b9a0
358 changed files with 2337 additions and 1424 deletions
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@ -30,13 +30,16 @@ jobs:
        activate-environment: true
        version: 0.7.6

-    - name: Build Llama Stack package
-      run: |
-        uv build
+    - name: Build Llama Stack API package
+      working-directory: src/llama-stack-api
+      run: uv build

-    - name: Install Llama Stack package
+    - name: Build Llama Stack package
+      run: uv build
+
+    - name: Install Llama Stack package (with api stubs from local build)
      run: |
-        uv pip install dist/*.whl
+        uv pip install --find-links src/llama-stack-api/dist dist/*.whl

    - name: Verify Llama Stack package
      run: |
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -42,7 +42,7 @@ repos:
    hooks:
    -   id: ruff
        args: [ --fix ]
-        exclude: ^src/llama_stack/strong_typing/.*$
+        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
    -   id: ruff-format

 -   repo: https://github.com/adamchainz/blacken-docs
--- a/docs/docs/concepts/apis/external.mdx
+++ b/docs/docs/concepts/apis/external.mdx
@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha

 ```python
 # llama_stack_api_weather/api.py
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec


 def available_providers() -> list[ProviderSpec]:
@ -79,7 +79,7 @@ A Protocol class like so:
 # llama_stack_api_weather/api.py
 from typing import Protocol

-from llama_stack.schema_utils import webmethod
+from llama_stack_api import webmethod


 class WeatherAPI(Protocol):
@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"]
 # llama-stack-api-weather/src/llama_stack_api_weather/weather.py
 from typing import Protocol

-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
    Api,
    ProviderSpec,
    RemoteProviderSpec,
+    webmethod,
 )
-from llama_stack.schema_utils import webmethod
-

 def available_providers() -> list[ProviderSpec]:
    return [
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d
 Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:

 ```python
-from llama_stack.providers.datatypes import ProviderSpec
+from llama_stack_api.providers.datatypes import ProviderSpec


 def get_provider_spec() -> ProviderSpec:
--- a/docs/docs/providers/external/external-providers-guide.mdx
+++ b/docs/docs/providers/external/external-providers-guide.mdx
@ -80,7 +80,7 @@ container_image: custom-vector-store:latest  # optional
 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:

 ```python
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
    ProviderSpec,
    Api,
    RemoteProviderSpec,
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -153,7 +153,7 @@ description: |
  Example using RAGQueryConfig with different search modes:

  ```python
-  from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker

  # Vector search
  config = RAGQueryConfig(mode="vector", max_chunks=5)
@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:

 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker

 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml

-from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402

 from .pyopenapi.options import Options  # noqa: E402
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -16,27 +16,27 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union

 from fastapi import UploadFile

-from llama_stack.apis.datatypes import Error
-from llama_stack.strong_typing.core import JsonType
-from llama_stack.strong_typing.docstring import Docstring, parse_type
-from llama_stack.strong_typing.inspection import (
+from llama_stack_api import (
+    Docstring,
+    Error,
+    JsonSchemaGenerator,
+    JsonType,
+    Schema,
+    SchemaOptions,
+    get_schema_identifier,
    is_generic_list,
    is_type_optional,
    is_type_union,
    is_unwrapped_body_param,
+    json_dump_string,
+    object_to_json,
+    parse_type,
+    python_type_to_name,
+    register_schema,
    unwrap_generic_list,
    unwrap_optional_type,
    unwrap_union_types,
 )
-from llama_stack.strong_typing.name import python_type_to_name
-from llama_stack.strong_typing.schema import (
-    get_schema_identifier,
-    JsonSchemaGenerator,
-    register_schema,
-    Schema,
-    SchemaOptions,
-)
-from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
 from pydantic import BaseModel

 from .operations import (
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -11,19 +11,21 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union

-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
-
 from termcolor import colored

-from llama_stack.strong_typing.inspection import get_signature
-
 from typing import get_origin, get_args

 from fastapi import UploadFile
 from fastapi.params import File, Form
 from typing import Annotated

-from llama_stack.schema_utils import ExtraBodyField
+from llama_stack_api import (
+    ExtraBodyField,
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+    get_signature,
+)


 def split_prefix(
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@ -9,7 +9,7 @@ import enum
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union

-from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType
+from llama_stack_api import JsonType, Schema, StrictJsonType

 URL = str

--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@ -11,8 +11,7 @@ from pathlib import Path
 from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args

 from pydantic import BaseModel
-from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
+from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json
 from llama_stack.core.resolver import api_protocol_map

 from .generator import Generator
@ -165,12 +164,12 @@ def _validate_api_delete_method_returns_none(method) -> str | None:
        return "has no return type annotation"

    return_type = hints['return']
-    
+
    # Allow OpenAI endpoints to return response objects since they follow OpenAI specification
    method_name = getattr(method, '__name__', '')
    if method_name.__contains__('openai_'):
        return None
-    
+
    if return_type is not None and return_type is not type(None):
        return "does not return None where None is mandatory"

--- a/pyproject.toml
+++ b/pyproject.toml
@ -31,6 +31,7 @@ dependencies = [
    "httpx",
    "jinja2>=3.1.6",
    "jsonschema",
+    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
    "openai>=2.5.0",
    "prompt-toolkit",
    "python-dotenv",
@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p

 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*"]
+include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]

 [[tool.uv.index]]
 name = "pytorch-cpu"
@ -190,6 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
+llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]

 [tool.ruff]
 line-length = 120
@ -256,8 +258,8 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API

 [tool.mypy]
-mypy_path = ["src"]
-packages = ["llama_stack"]
+mypy_path = ["src", "src/llama-stack-api"]
+packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
 warn_return_any = true
@ -279,15 +281,18 @@ exclude = [
    "^src/llama_stack/core/store/registry\\.py$",
    "^src/llama_stack/core/utils/exec\\.py$",
    "^src/llama_stack/core/utils/prompt_for_config\\.py$",
+    # Moved to llama-stack-api but still excluded
    "^src/llama_stack/models/llama/llama3/interface\\.py$",
    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
-    "^src/llama_stack/providers/inline/datasetio/localfs/",
-    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
-    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/models/llama/llama3/generation\\.py$",
    "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
    "^src/llama_stack/models/llama/llama4/",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
+    "^src/llama_stack/providers/inline/agents/meta_reference/",
+    "^src/llama_stack/providers/inline/datasetio/localfs/",
+    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
+    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
    "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
    "^src/llama_stack/providers/inline/safety/code_scanner/",
@ -337,7 +342,9 @@ exclude = [
    "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
    "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
    "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
+    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
    "^src/llama_stack/distributions/template\\.py$",
 ]

--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@ -14,8 +14,8 @@ import os
 from pathlib import Path

 import fire
+from llama_stack_api import ModelNotFoundError

-from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
    """Extract docstring from the API protocol class."""
    try:
        # Import the API module dynamically
-        api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()])

        # Get the main protocol class (usually capitalized API name)
        protocol_class_name = api_name.title()
@ -83,8 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                # this string replace is ridiculous
                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.inference.", "")
                field_type = field_type.replace("llama_stack.providers.", "")
+                field_type = field_type.replace("llama_stack_api.datatypes.", "")

                default_value = field.default
                if field.default_factory is not None:
--- a/src/llama-stack-api/README.md
+++ b/src/llama-stack-api/README.md
@ -0,0 +1,103 @@
+# llama-stack-api
+
+API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs.
+
+## Overview
+
+`llama-stack-api` is a minimal dependency package that contains:
+
+- **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.)
+- **Provider Specifications**: Provider spec definitions for building custom providers
+- **Data Types**: Shared data types and models used across the Llama Stack ecosystem
+- **Type Utilities**: Strong typing utilities and schema validation
+
+## What This Package Does NOT Include
+
+- Server implementation (see `llama-stack` package)
+- Provider implementations (see `llama-stack` package)
+- CLI tools (see `llama-stack` package)
+- Runtime orchestration (see `llama-stack` package)
+
+## Use Cases
+
+This package is designed for:
+
+1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server
+2. **Client Library Authors**: Use type definitions without server dependencies
+3. **Documentation Generation**: Generate API docs from protocol definitions
+4. **Type Checking**: Validate implementations against the official specs
+
+## Installation
+
+```bash
+pip install llama-stack-api
+```
+
+Or with uv:
+
+```bash
+uv pip install llama-stack-api
+```
+
+## Dependencies
+
+Minimal dependencies:
+- `pydantic>=2.11.9` - For data validation and serialization
+- `jsonschema` - For JSON schema utilities
+
+## Versioning
+
+This package follows semantic versioning independently from the main `llama-stack` package:
+
+- **Patch versions** (0.1.x): Documentation, internal improvements
+- **Minor versions** (0.x.0): New APIs, backward-compatible changes
+- **Major versions** (x.0.0): Breaking changes to existing APIs
+
+Current version: **0.1.0**
+
+## Usage Example
+
+```python
+from llama_stack_api.inference import Inference, ChatCompletionRequest
+from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
+from llama_stack_api.datatypes import Api
+
+
+# Use protocol definitions for type checking
+class MyInferenceProvider(Inference):
+    async def chat_completion(self, request: ChatCompletionRequest):
+        # Your implementation
+        pass
+
+
+# Define provider specifications
+my_provider_spec = InlineProviderSpec(
+    api=Api.inference,
+    provider_type="inline::my-provider",
+    pip_packages=["my-dependencies"],
+    module="my_package.providers.inference",
+    config_class="my_package.providers.inference.MyConfig",
+)
+```
+
+## Relationship to llama-stack
+
+The main `llama-stack` package depends on `llama-stack-api` and provides:
+- Full server implementation
+- Built-in provider implementations
+- CLI tools for running and managing stacks
+- Runtime provider resolution and orchestration
+
+## Contributing
+
+See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines.
+
+## License
+
+MIT License - see LICENSE file for details.
+
+## Links
+
+- [Main Llama Stack Repository](https://github.com/llamastack/llama-stack)
+- [Documentation](https://llamastack.ai/)
+- [Client Library](https://pypi.org/project/llama-stack-client/)
--- a/src/llama-stack-api/llama_stack_api/init.py
+++ b/src/llama-stack-api/llama_stack_api/init.py
@ -0,0 +1,871 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Llama Stack API Specifications
+
+This package contains the API definitions, data types, and protocol specifications
+for Llama Stack. It is designed to be a lightweight dependency for external providers
+and clients that need to interact with Llama Stack APIs without requiring the full
+server implementation.
+
+All imports from this package MUST use the form:
+    from llama_stack_api import <symbol>
+
+Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported
+and considered a code smell. All exported symbols are explicitly listed in __all__.
+"""
+
+__version__ = "0.4.0"
+
+# Import submodules for those who need them
+from . import common, strong_typing  # noqa: F401
+
+# Import all public API symbols
+from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
+from .batches import Batches, BatchObject, ListBatchesResponse
+from .benchmarks import (
+    Benchmark,
+    BenchmarkInput,
+    Benchmarks,
+    CommonBenchmarkFields,
+    ListBenchmarksResponse,
+)
+
+# Import commonly used types from common submodule
+from .common.content_types import (
+    URL,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    TextContentItem,
+    _URLOrData,
+)
+from .common.errors import (
+    ConflictError,
+    DatasetNotFoundError,
+    InvalidConversationIdError,
+    ModelNotFoundError,
+    ModelTypeError,
+    ResourceNotFoundError,
+    TokenValidationError,
+    ToolGroupNotFoundError,
+    UnsupportedModelError,
+    VectorStoreNotFoundError,
+)
+from .common.job_types import Job, JobStatus
+from .common.responses import Order, PaginatedResponse
+from .common.training_types import Checkpoint, PostTrainingMetric
+from .common.type_system import (
+    ChatCompletionInputType,
+    CompletionInputType,
+    NumberType,
+    ParamType,
+    StringType,
+)
+from .conversations import (
+    Conversation,
+    ConversationDeletedResource,
+    ConversationItem,
+    ConversationItemCreateRequest,
+    ConversationItemDeletedResource,
+    ConversationItemInclude,
+    ConversationItemList,
+    ConversationMessage,
+    Conversations,
+    Metadata,
+)
+from .datasetio import DatasetIO, DatasetStore
+from .datasets import (
+    CommonDatasetFields,
+    Dataset,
+    DatasetInput,
+    DatasetPurpose,
+    Datasets,
+    DatasetType,
+    DataSource,
+    ListDatasetsResponse,
+    RowsDataSource,
+    URIDataSource,
+)
+from .datatypes import (
+    Api,
+    BenchmarksProtocolPrivate,
+    DatasetsProtocolPrivate,
+    DynamicApiMeta,
+    Error,
+    ExternalApiSpec,
+    HealthResponse,
+    HealthStatus,
+    InlineProviderSpec,
+    ModelsProtocolPrivate,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    RoutingTable,
+    ScoringFunctionsProtocolPrivate,
+    ShieldsProtocolPrivate,
+    ToolGroupsProtocolPrivate,
+    VectorStoresProtocolPrivate,
+)
+from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
+from .files import (
+    ExpiresAfter,
+    Files,
+    ListOpenAIFileResponse,
+    OpenAIFileDeleteResponse,
+    OpenAIFileObject,
+    OpenAIFilePurpose,
+)
+from .inference import (
+    Bf16QuantizationConfig,
+    ChatCompletionResponseEventType,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Fp8QuantizationConfig,
+    GrammarResponseFormat,
+    GreedySamplingStrategy,
+    Inference,
+    InferenceProvider,
+    Int4QuantizationConfig,
+    JsonSchemaResponseFormat,
+    ListOpenAIChatCompletionResponse,
+    LogProbConfig,
+    ModelStore,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionMessageContent,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionTextOnlyMessageContent,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChatCompletionUsage,
+    OpenAIChatCompletionUsageCompletionTokensDetails,
+    OpenAIChatCompletionUsagePromptTokensDetails,
+    OpenAIChoice,
+    OpenAIChoiceDelta,
+    OpenAIChoiceLogprobs,
+    OpenAIChunkChoice,
+    OpenAICompletion,
+    OpenAICompletionChoice,
+    OpenAICompletionLogprobs,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAICompletionWithInputMessages,
+    OpenAIDeveloperMessageParam,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIFile,
+    OpenAIFileFile,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAITokenLogProb,
+    OpenAIToolMessageParam,
+    OpenAITopLogProb,
+    OpenAIUserMessageParam,
+    QuantizationConfig,
+    QuantizationType,
+    RerankData,
+    RerankResponse,
+    ResponseFormat,
+    ResponseFormatType,
+    SamplingParams,
+    SamplingStrategy,
+    SystemMessage,
+    SystemMessageBehavior,
+    TextTruncation,
+    TokenLogProbs,
+    ToolChoice,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
+)
+from .inspect import (
+    ApiFilter,
+    HealthInfo,
+    Inspect,
+    ListRoutesResponse,
+    RouteInfo,
+    VersionInfo,
+)
+from .models import (
+    CommonModelFields,
+    ListModelsResponse,
+    Model,
+    ModelInput,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+from .openai_responses import (
+    AllowedToolsFilter,
+    ApprovalFilter,
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    MCPListToolsTool,
+    OpenAIDeleteResponseObject,
+    OpenAIResponseAnnotationCitation,
+    OpenAIResponseAnnotationContainerFileCitation,
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseAnnotationFilePath,
+    OpenAIResponseAnnotations,
+    OpenAIResponseContentPart,
+    OpenAIResponseContentPartOutputText,
+    OpenAIResponseContentPartReasoningSummary,
+    OpenAIResponseContentPartReasoningText,
+    OpenAIResponseContentPartRefusal,
+    OpenAIResponseError,
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseContentPartAdded,
+    OpenAIResponseObjectStreamResponseContentPartDone,
+    OpenAIResponseObjectStreamResponseCreated,
+    OpenAIResponseObjectStreamResponseFailed,
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseIncomplete,
+    OpenAIResponseObjectStreamResponseInProgress,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseMcpCallCompleted,
+    OpenAIResponseObjectStreamResponseMcpCallFailed,
+    OpenAIResponseObjectStreamResponseMcpCallInProgress,
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed,
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
+    OpenAIResponseObjectStreamResponseOutputItemAdded,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded,
+    OpenAIResponseObjectStreamResponseOutputTextDelta,
+    OpenAIResponseObjectStreamResponseOutputTextDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone,
+    OpenAIResponseObjectStreamResponseReasoningTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningTextDone,
+    OpenAIResponseObjectStreamResponseRefusalDelta,
+    OpenAIResponseObjectStreamResponseRefusalDone,
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
+    OpenAIResponseObjectWithInput,
+    OpenAIResponseOutput,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
+    OpenAIResponseOutputMessageFileSearchToolCallResults,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+    OpenAIResponseTool,
+    OpenAIResponseToolMCP,
+    OpenAIResponseUsage,
+    OpenAIResponseUsageInputTokensDetails,
+    OpenAIResponseUsageOutputTokensDetails,
+    WebSearchToolTypes,
+)
+from .post_training import (
+    AlgorithmConfig,
+    DataConfig,
+    DatasetFormat,
+    DPOAlignmentConfig,
+    DPOLossType,
+    EfficiencyConfig,
+    ListPostTrainingJobsResponse,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    PostTraining,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobLogStream,
+    PostTrainingJobStatusResponse,
+    PostTrainingRLHFRequest,
+    QATFinetuningConfig,
+    RLHFAlgorithm,
+    TrainingConfig,
+)
+from .prompts import ListPromptsResponse, Prompt, Prompts
+from .providers import ListProvidersResponse, ProviderInfo, Providers
+from .rag_tool import (
+    DefaultRAGQueryGeneratorConfig,
+    LLMRAGQueryGeneratorConfig,
+    RAGDocument,
+    RAGQueryConfig,
+    RAGQueryGenerator,
+    RAGQueryGeneratorConfig,
+    RAGQueryResult,
+    RAGSearchMode,
+    Ranker,
+    RRFRanker,
+    WeightedRanker,
+)
+from .resource import Resource, ResourceType
+from .safety import (
+    ModerationObject,
+    ModerationObjectResults,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    ShieldStore,
+    ViolationLevel,
+)
+from .schema_utils import (
+    CallableT,
+    ExtraBodyField,
+    WebMethod,
+    json_schema_type,
+    register_schema,
+    webmethod,
+)
+from .scoring import (
+    ScoreBatchResponse,
+    ScoreResponse,
+    Scoring,
+    ScoringFunctionStore,
+    ScoringResult,
+    ScoringResultRow,
+)
+from .scoring_functions import (
+    AggregationFunctionType,
+    BasicScoringFnParams,
+    CommonScoringFnFields,
+    ListScoringFunctionsResponse,
+    LLMAsJudgeScoringFnParams,
+    RegexParserScoringFnParams,
+    ScoringFn,
+    ScoringFnInput,
+    ScoringFnParams,
+    ScoringFnParamsType,
+    ScoringFunctions,
+)
+from .shields import (
+    CommonShieldFields,
+    ListShieldsResponse,
+    Shield,
+    ShieldInput,
+    Shields,
+)
+
+# Import from strong_typing
+from .strong_typing.core import JsonType
+from .strong_typing.docstring import Docstring, parse_type
+from .strong_typing.inspection import (
+    get_signature,
+    is_generic_list,
+    is_type_optional,
+    is_type_union,
+    is_unwrapped_body_param,
+    unwrap_generic_list,
+    unwrap_optional_type,
+    unwrap_union_types,
+)
+from .strong_typing.name import python_type_to_name
+from .strong_typing.schema import (
+    JsonSchemaGenerator,
+    Schema,
+    SchemaOptions,
+    StrictJsonType,
+    get_schema_identifier,
+)
+from .strong_typing.serialization import json_dump_string, object_to_json
+from .tools import (
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    SpecialToolGroup,
+    ToolDef,
+    ToolGroup,
+    ToolGroupInput,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    ToolStore,
+)
+from .vector_io import (
+    Chunk,
+    ChunkMetadata,
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
+    OpenAICreateVectorStoreRequestWithExtraBody,
+    QueryChunksResponse,
+    SearchRankingOptions,
+    VectorIO,
+    VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyAuto,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
+    VectorStoreContent,
+    VectorStoreCreateRequest,
+    VectorStoreDeleteResponse,
+    VectorStoreFileBatchObject,
+    VectorStoreFileContentResponse,
+    VectorStoreFileCounts,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileLastError,
+    VectorStoreFileObject,
+    VectorStoreFilesListInBatchResponse,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
+    VectorStoreListResponse,
+    VectorStoreModifyRequest,
+    VectorStoreObject,
+    VectorStoreSearchRequest,
+    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
+    VectorStoreTable,
+)
+from .vector_stores import VectorStore, VectorStoreInput
+from .version import (
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+)
+
+__all__ = [
+    # Submodules
+    "common",
+    "strong_typing",
+    # Version constants
+    "LLAMA_STACK_API_V1",
+    "LLAMA_STACK_API_V1ALPHA",
+    "LLAMA_STACK_API_V1BETA",
+    # API Symbols
+    "Agents",
+    "AggregationFunctionType",
+    "AlgorithmConfig",
+    "AllowedToolsFilter",
+    "Api",
+    "ApiFilter",
+    "ApprovalFilter",
+    "BasicScoringFnParams",
+    "Batches",
+    "BatchObject",
+    "Benchmark",
+    "BenchmarkConfig",
+    "BenchmarkInput",
+    "Benchmarks",
+    "BenchmarksProtocolPrivate",
+    "Bf16QuantizationConfig",
+    "CallableT",
+    "ChatCompletionInputType",
+    "ChatCompletionResponseEventType",
+    "Checkpoint",
+    "Chunk",
+    "ChunkMetadata",
+    "CommonBenchmarkFields",
+    "ConflictError",
+    "CommonDatasetFields",
+    "CommonModelFields",
+    "CommonScoringFnFields",
+    "CommonShieldFields",
+    "CompletionInputType",
+    "CompletionRequest",
+    "Conversation",
+    "ConversationDeletedResource",
+    "ConversationItem",
+    "ConversationItemCreateRequest",
+    "ConversationItemDeletedResource",
+    "ConversationItemInclude",
+    "ConversationItemList",
+    "ConversationMessage",
+    "Conversations",
+    "DPOAlignmentConfig",
+    "DPOLossType",
+    "DataConfig",
+    "DataSource",
+    "Dataset",
+    "DatasetFormat",
+    "DatasetIO",
+    "DatasetInput",
+    "DatasetPurpose",
+    "DatasetNotFoundError",
+    "DatasetStore",
+    "DatasetType",
+    "Datasets",
+    "DatasetsProtocolPrivate",
+    "DefaultRAGQueryGeneratorConfig",
+    "Docstring",
+    "DynamicApiMeta",
+    "EfficiencyConfig",
+    "EmbeddingTaskType",
+    "EmbeddingsResponse",
+    "Error",
+    "Eval",
+    "EvalCandidate",
+    "EvaluateResponse",
+    "ExpiresAfter",
+    "ExternalApiSpec",
+    "ExtraBodyField",
+    "Files",
+    "Fp8QuantizationConfig",
+    "get_schema_identifier",
+    "get_signature",
+    "GrammarResponseFormat",
+    "GreedySamplingStrategy",
+    "HealthInfo",
+    "HealthResponse",
+    "HealthStatus",
+    "ImageContentItem",
+    "Inference",
+    "InferenceProvider",
+    "InlineProviderSpec",
+    "Inspect",
+    "Int4QuantizationConfig",
+    "InterleavedContent",
+    "InterleavedContentItem",
+    "InvalidConversationIdError",
+    "is_generic_list",
+    "is_type_optional",
+    "is_type_union",
+    "is_unwrapped_body_param",
+    "Job",
+    "JobStatus",
+    "json_dump_string",
+    "json_schema_type",
+    "JsonSchemaGenerator",
+    "JsonSchemaResponseFormat",
+    "JsonType",
+    "LLMAsJudgeScoringFnParams",
+    "LLMRAGQueryGeneratorConfig",
+    "ListBatchesResponse",
+    "ListBenchmarksResponse",
+    "ListDatasetsResponse",
+    "ListModelsResponse",
+    "ListOpenAIChatCompletionResponse",
+    "ListOpenAIFileResponse",
+    "ListOpenAIResponseInputItem",
+    "ListOpenAIResponseObject",
+    "ListPostTrainingJobsResponse",
+    "ListPromptsResponse",
+    "ListProvidersResponse",
+    "ListRoutesResponse",
+    "ListScoringFunctionsResponse",
+    "ListShieldsResponse",
+    "ListToolDefsResponse",
+    "ListToolGroupsResponse",
+    "LogProbConfig",
+    "LoraFinetuningConfig",
+    "MCPListToolsTool",
+    "Metadata",
+    "Model",
+    "ModelCandidate",
+    "ModelInput",
+    "ModelNotFoundError",
+    "ModelStore",
+    "ModelType",
+    "ModelTypeError",
+    "Models",
+    "ModelsProtocolPrivate",
+    "ModerationObject",
+    "ModerationObjectResults",
+    "NumberType",
+    "object_to_json",
+    "OpenAIAssistantMessageParam",
+    "OpenAIChatCompletion",
+    "OpenAIChatCompletionChunk",
+    "OpenAIChatCompletionContentPartImageParam",
+    "OpenAIChatCompletionContentPartParam",
+    "OpenAIChatCompletionContentPartTextParam",
+    "OpenAIChatCompletionMessageContent",
+    "OpenAIChatCompletionRequestWithExtraBody",
+    "OpenAIChatCompletionTextOnlyMessageContent",
+    "OpenAIChatCompletionToolCall",
+    "OpenAIChatCompletionToolCallFunction",
+    "OpenAIChatCompletionUsage",
+    "OpenAIChatCompletionUsageCompletionTokensDetails",
+    "OpenAIChatCompletionUsagePromptTokensDetails",
+    "OpenAIChoice",
+    "OpenAIChoiceDelta",
+    "OpenAIChoiceLogprobs",
+    "OpenAIChunkChoice",
+    "OpenAICompletion",
+    "OpenAICompletionChoice",
+    "OpenAICompletionLogprobs",
+    "OpenAICompletionRequestWithExtraBody",
+    "OpenAICompletionWithInputMessages",
+    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
+    "OpenAICreateVectorStoreRequestWithExtraBody",
+    "OpenAIDeleteResponseObject",
+    "OpenAIDeveloperMessageParam",
+    "OpenAIEmbeddingData",
+    "OpenAIEmbeddingUsage",
+    "OpenAIEmbeddingsRequestWithExtraBody",
+    "OpenAIEmbeddingsResponse",
+    "OpenAIFile",
+    "OpenAIFileDeleteResponse",
+    "OpenAIFileFile",
+    "OpenAIFileObject",
+    "OpenAIFilePurpose",
+    "OpenAIImageURL",
+    "OpenAIJSONSchema",
+    "OpenAIListModelsResponse",
+    "OpenAIMessageParam",
+    "OpenAIModel",
+    "Order",
+    "OpenAIResponseAnnotationCitation",
+    "OpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseAnnotationFilePath",
+    "OpenAIResponseAnnotations",
+    "OpenAIResponseContentPart",
+    "OpenAIResponseContentPartOutputText",
+    "OpenAIResponseContentPartReasoningSummary",
+    "OpenAIResponseContentPartReasoningText",
+    "OpenAIResponseContentPartRefusal",
+    "OpenAIResponseError",
+    "OpenAIResponseFormatJSONObject",
+    "OpenAIResponseFormatJSONSchema",
+    "OpenAIResponseFormatParam",
+    "OpenAIResponseFormatText",
+    "OpenAIResponseInput",
+    "OpenAIResponseInputFunctionToolCallOutput",
+    "OpenAIResponseInputMessageContent",
+    "OpenAIResponseInputMessageContentFile",
+    "OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseInputMessageContentText",
+    "OpenAIResponseInputTool",
+    "OpenAIResponseInputToolFileSearch",
+    "OpenAIResponseInputToolFunction",
+    "OpenAIResponseInputToolMCP",
+    "OpenAIResponseInputToolWebSearch",
+    "OpenAIResponseMCPApprovalRequest",
+    "OpenAIResponseMCPApprovalResponse",
+    "OpenAIResponseMessage",
+    "OpenAIResponseObject",
+    "OpenAIResponseObjectStream",
+    "OpenAIResponseObjectStreamResponseCompleted",
+    "OpenAIResponseObjectStreamResponseContentPartAdded",
+    "OpenAIResponseObjectStreamResponseContentPartDone",
+    "OpenAIResponseObjectStreamResponseCreated",
+    "OpenAIResponseObjectStreamResponseFailed",
+    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseFileSearchCallSearching",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseInProgress",
+    "OpenAIResponseObjectStreamResponseIncomplete",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
+    "OpenAIResponseObjectStreamResponseMcpCallFailed",
+    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
+    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
+    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
+    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
+    "OpenAIResponseObjectStreamResponseOutputItemAdded",
+    "OpenAIResponseObjectStreamResponseOutputItemDone",
+    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded",
+    "OpenAIResponseObjectStreamResponseOutputTextDelta",
+    "OpenAIResponseObjectStreamResponseOutputTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningTextDone",
+    "OpenAIResponseObjectStreamResponseRefusalDelta",
+    "OpenAIResponseObjectStreamResponseRefusalDone",
+    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
+    "OpenAIResponseObjectWithInput",
+    "OpenAIResponseOutput",
+    "OpenAIResponseOutputMessageContent",
+    "OpenAIResponseOutputMessageContentOutputText",
+    "OpenAIResponseOutputMessageFileSearchToolCall",
+    "OpenAIResponseOutputMessageFileSearchToolCallResults",
+    "OpenAIResponseOutputMessageFunctionToolCall",
+    "OpenAIResponseOutputMessageMCPCall",
+    "OpenAIResponseOutputMessageMCPListTools",
+    "OpenAIResponseOutputMessageWebSearchToolCall",
+    "OpenAIResponsePrompt",
+    "OpenAIResponseText",
+    "OpenAIResponseTextFormat",
+    "OpenAIResponseTool",
+    "OpenAIResponseToolMCP",
+    "OpenAIResponseUsage",
+    "OpenAIResponseUsageInputTokensDetails",
+    "OpenAIResponseUsageOutputTokensDetails",
+    "OpenAISystemMessageParam",
+    "OpenAITokenLogProb",
+    "OpenAIToolMessageParam",
+    "OpenAITopLogProb",
+    "OpenAIUserMessageParam",
+    "OptimizerConfig",
+    "OptimizerType",
+    "PaginatedResponse",
+    "ParamType",
+    "parse_type",
+    "PostTraining",
+    "PostTrainingMetric",
+    "PostTrainingJob",
+    "PostTrainingJobArtifactsResponse",
+    "PostTrainingJobLogStream",
+    "PostTrainingJobStatusResponse",
+    "PostTrainingRLHFRequest",
+    "Prompt",
+    "Prompts",
+    "ProviderInfo",
+    "ProviderSpec",
+    "Providers",
+    "python_type_to_name",
+    "QATFinetuningConfig",
+    "QuantizationConfig",
+    "QuantizationType",
+    "QueryChunksResponse",
+    "RAGDocument",
+    "RAGQueryConfig",
+    "RAGQueryGenerator",
+    "RAGQueryGeneratorConfig",
+    "RAGQueryResult",
+    "RAGSearchMode",
+    "register_schema",
+    "RLHFAlgorithm",
+    "RRFRanker",
+    "Ranker",
+    "RegexParserScoringFnParams",
+    "RemoteProviderConfig",
+    "RemoteProviderSpec",
+    "RerankData",
+    "RerankResponse",
+    "Resource",
+    "ResourceNotFoundError",
+    "ResourceType",
+    "ResponseFormat",
+    "ResponseFormatType",
+    "ResponseGuardrail",
+    "ResponseGuardrailSpec",
+    "RouteInfo",
+    "RoutingTable",
+    "RowsDataSource",
+    "RunShieldResponse",
+    "Safety",
+    "SafetyViolation",
+    "SamplingParams",
+    "SamplingStrategy",
+    "ScoreBatchResponse",
+    "ScoreResponse",
+    "Scoring",
+    "ScoringFn",
+    "ScoringFnInput",
+    "ScoringFnParams",
+    "ScoringFnParamsType",
+    "ScoringFunctionStore",
+    "ScoringFunctions",
+    "ScoringFunctionsProtocolPrivate",
+    "ScoringResult",
+    "ScoringResultRow",
+    "Schema",
+    "SchemaOptions",
+    "SearchRankingOptions",
+    "Shield",
+    "ShieldInput",
+    "ShieldStore",
+    "Shields",
+    "ShieldsProtocolPrivate",
+    "SpecialToolGroup",
+    "StrictJsonType",
+    "StringType",
+    "SystemMessage",
+    "SystemMessageBehavior",
+    "TextContentItem",
+    "TextTruncation",
+    "TokenLogProbs",
+    "TokenValidationError",
+    "ToolChoice",
+    "ToolGroupNotFoundError",
+    "ToolDef",
+    "ToolGroup",
+    "ToolGroupInput",
+    "ToolGroups",
+    "ToolGroupsProtocolPrivate",
+    "ToolInvocationResult",
+    "ToolResponseMessage",
+    "ToolRuntime",
+    "ToolStore",
+    "TopKSamplingStrategy",
+    "TopPSamplingStrategy",
+    "TrainingConfig",
+    "UnsupportedModelError",
+    "unwrap_generic_list",
+    "unwrap_optional_type",
+    "unwrap_union_types",
+    "URIDataSource",
+    "URL",
+    "_URLOrData",
+    "UserMessage",
+    "VectorIO",
+    "VectorStore",
+    "VectorStoreChunkingStrategy",
+    "VectorStoreChunkingStrategyAuto",
+    "VectorStoreChunkingStrategyStatic",
+    "VectorStoreChunkingStrategyStaticConfig",
+    "VectorStoreContent",
+    "VectorStoreCreateRequest",
+    "VectorStoreDeleteResponse",
+    "VectorStoreFileBatchObject",
+    "VectorStoreFileContentResponse",
+    "VectorStoreFileCounts",
+    "VectorStoreFileDeleteResponse",
+    "VectorStoreFileLastError",
+    "VectorStoreFileObject",
+    "VectorStoreFileStatus",
+    "VectorStoreFilesListInBatchResponse",
+    "VectorStoreInput",
+    "VectorStoreListFilesResponse",
+    "VectorStoreListResponse",
+    "VectorStoreModifyRequest",
+    "VectorStoreObject",
+    "VectorStoreSearchRequest",
+    "VectorStoreSearchResponse",
+    "VectorStoreSearchResponsePage",
+    "VectorStoreTable",
+    "VectorStoreNotFoundError",
+    "VectorStoresProtocolPrivate",
+    "VersionInfo",
+    "ViolationLevel",
+    "webmethod",
+    "WebMethod",
+    "WebSearchToolTypes",
+    "WeightedRanker",
+]
--- a/src/llama-stack-api/llama_stack_api/agents.py
+++ b/src/llama-stack-api/llama_stack_api/agents.py
@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable

 from pydantic import BaseModel

-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.common.responses import Order
+from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1

 from .openai_responses import (
    ListOpenAIResponseInputItem,
--- a/src/llama-stack-api/llama_stack_api/batches.py
+++ b/src/llama-stack-api/llama_stack_api/batches.py
@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable

 from pydantic import BaseModel, Field

-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1

 try:
    from openai.types import Batch as BatchObject
--- a/src/llama-stack-api/llama_stack_api/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/benchmarks.py
@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable

 from pydantic import BaseModel, Field

-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA


 class CommonBenchmarkFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/common/init.py
+++ b/src/llama-stack-api/llama_stack_api/common/init.py
--- a/src/llama-stack-api/llama_stack_api/common/content_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/content_types.py
@ -4,13 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from enum import Enum
 from typing import Annotated, Literal

 from pydantic import BaseModel, Field, model_validator

-from llama_stack.models.llama.datatypes import ToolCall
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema


@json_schema_type
@ -101,43 +99,3 @@ class ImageDelta(BaseModel):

    type: Literal["image"] = "image"
    image: bytes
-
-
-class ToolCallParseStatus(Enum):
-    """Status of tool call parsing during streaming.
-    :cvar started: Tool call parsing has begun
-    :cvar in_progress: Tool call parsing is ongoing
-    :cvar failed: Tool call parsing failed
-    :cvar succeeded: Tool call parsing completed successfully
-    """
-
-    started = "started"
-    in_progress = "in_progress"
-    failed = "failed"
-    succeeded = "succeeded"
-
-
-@json_schema_type
-class ToolCallDelta(BaseModel):
-    """A tool call content delta for streaming responses.
-
-    :param type: Discriminator type of the delta. Always "tool_call"
-    :param tool_call: Either an in-progress tool call string or the final parsed tool call
-    :param parse_status: Current parsing status of the tool call
-    """
-
-    type: Literal["tool_call"] = "tool_call"
-
-    # you either send an in-progress tool call so the client can stream a long
-    # code generation or you send the final parsed tool call at the end of the
-    # stream
-    tool_call: str | ToolCall
-    parse_status: ToolCallParseStatus
-
-
-# streaming completions send a stream of ContentDeltas
-ContentDelta = Annotated[
-    TextDelta | ImageDelta | ToolCallDelta,
-    Field(discriminator="type"),
-]
-register_schema(ContentDelta, name="ContentDelta")
--- a/src/llama-stack-api/llama_stack_api/common/errors.py
+++ b/src/llama-stack-api/llama_stack_api/common/errors.py
--- a/src/llama-stack-api/llama_stack_api/common/job_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/job_types.py
@ -7,7 +7,7 @@ from enum import Enum

 from pydantic import BaseModel

-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type


 class JobStatus(Enum):
--- a/src/llama-stack-api/llama_stack_api/common/responses.py
+++ b/src/llama-stack-api/llama_stack_api/common/responses.py
@ -9,7 +9,7 @@ from typing import Any

 from pydantic import BaseModel

-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type


 class Order(Enum):
--- a/src/llama-stack-api/llama_stack_api/common/tracing.py
+++ b/src/llama-stack-api/llama_stack_api/common/tracing.py
--- a/src/llama-stack-api/llama_stack_api/common/training_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/training_types.py
@ -8,7 +8,7 @@ from datetime import datetime

 from pydantic import BaseModel

-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/common/type_system.py
+++ b/src/llama-stack-api/llama_stack_api/common/type_system.py
@ -8,7 +8,7 @@ from typing import Annotated, Literal

 from pydantic import BaseModel, Field

-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/conversations.py
@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable

 from pydantic import BaseModel, Field

-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.openai_responses import (
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseMCPApprovalRequest,
    OpenAIResponseMCPApprovalResponse,
@ -20,9 +21,8 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageMCPListTools,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1

 Metadata = dict[str, str]

--- a/src/llama-stack-api/llama_stack_api/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/datasetio.py
@ -6,10 +6,10 @@

 from typing import Any, Protocol, runtime_checkable

-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasets import Dataset
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.schema_utils import webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA


 class DatasetStore(Protocol):
--- a/src/llama-stack-api/llama_stack_api/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/datasets.py
@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol

 from pydantic import BaseModel, Field

-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA


 class DatasetPurpose(StrEnum):
--- a/src/llama-stack-api/llama_stack_api/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/datatypes.py
@ -4,21 +4,172 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from enum import StrEnum
+from enum import Enum, EnumMeta, StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse

 from pydantic import BaseModel, Field

-from llama_stack.apis.benchmarks import Benchmark
-from llama_stack.apis.datasets import Dataset
-from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models import Model
-from llama_stack.apis.scoring_functions import ScoringFn
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.tools import ToolGroup
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api.scoring_functions import ScoringFn
+from llama_stack_api.shields import Shield
+from llama_stack_api.tools import ToolGroup
+from llama_stack_api.vector_stores import VectorStore
+
+
+class DynamicApiMeta(EnumMeta):
+    def __new__(cls, name, bases, namespace):
+        # Store the original enum values
+        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
+
+        # Create the enum class
+        cls = super().__new__(cls, name, bases, namespace)
+
+        # Store the original values for reference
+        cls._original_values = original_values
+        # Initialize _dynamic_values
+        cls._dynamic_values = {}
+
+        return cls
+
+    def __call__(cls, value):
+        try:
+            return super().__call__(value)
+        except ValueError as e:
+            # If this value was already dynamically added, return it
+            if value in cls._dynamic_values:
+                return cls._dynamic_values[value]
+
+            # If the value doesn't exist, create a new enum member
+            # Create a new member name from the value
+            member_name = value.lower().replace("-", "_")
+
+            # If this member name already exists in the enum, return the existing member
+            if member_name in cls._member_map_:
+                return cls._member_map_[member_name]
+
+            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
+            # register new APIs explicitly
+            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
+
+    def __iter__(cls):
+        # Allow iteration over both static and dynamic members
+        yield from super().__iter__()
+        if hasattr(cls, "_dynamic_values"):
+            yield from cls._dynamic_values.values()
+
+    def add(cls, value):
+        """
+        Add a new API to the enum.
+        Used to register external APIs.
+        """
+        member_name = value.lower().replace("-", "_")
+
+        # If this member name already exists in the enum, return it
+        if member_name in cls._member_map_:
+            return cls._member_map_[member_name]
+
+        # Create a new enum member
+        member = object.__new__(cls)
+        member._name_ = member_name
+        member._value_ = value
+
+        # Add it to the enum class
+        cls._member_map_[member_name] = member
+        cls._member_names_.append(member_name)
+        cls._member_type_ = str
+
+        # Store it in our dynamic values
+        cls._dynamic_values[value] = member
+
+        return member
+
+
+@json_schema_type
+class Api(Enum, metaclass=DynamicApiMeta):
+    """Enumeration of all available APIs in the Llama Stack system.
+    :cvar providers: Provider management and configuration
+    :cvar inference: Text generation, chat completions, and embeddings
+    :cvar safety: Content moderation and safety shields
+    :cvar agents: Agent orchestration and execution
+    :cvar batches: Batch processing for asynchronous API requests
+    :cvar vector_io: Vector database operations and queries
+    :cvar datasetio: Dataset input/output operations
+    :cvar scoring: Model output evaluation and scoring
+    :cvar eval: Model evaluation and benchmarking framework
+    :cvar post_training: Fine-tuning and model training
+    :cvar tool_runtime: Tool execution and management
+    :cvar telemetry: Observability and system monitoring
+    :cvar models: Model metadata and management
+    :cvar shields: Safety shield implementations
+    :cvar datasets: Dataset creation and management
+    :cvar scoring_functions: Scoring function definitions
+    :cvar benchmarks: Benchmark suite management
+    :cvar tool_groups: Tool group organization
+    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
+    :cvar inspect: Built-in system inspection and introspection
+    """
+
+    providers = "providers"
+    inference = "inference"
+    safety = "safety"
+    agents = "agents"
+    batches = "batches"
+    vector_io = "vector_io"
+    datasetio = "datasetio"
+    scoring = "scoring"
+    eval = "eval"
+    post_training = "post_training"
+    tool_runtime = "tool_runtime"
+
+    models = "models"
+    shields = "shields"
+    vector_stores = "vector_stores"  # only used for routing table
+    datasets = "datasets"
+    scoring_functions = "scoring_functions"
+    benchmarks = "benchmarks"
+    tool_groups = "tool_groups"
+    files = "files"
+    prompts = "prompts"
+    conversations = "conversations"
+
+    # built-in API
+    inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: str | None = None
+
+
+class ExternalApiSpec(BaseModel):
+    """Specification for an external API implementation."""
+
+    module: str = Field(..., description="Python module containing the API implementation")
+    name: str = Field(..., description="Name of the API")
+    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
+    protocol: str = Field(..., description="Name of the protocol class for the API")
+
+
+# Provider-related types (merged from providers/datatypes.py)
+# NOTE: These imports are forward references to avoid circular dependencies
+# They will be resolved at runtime when the classes are used


 class ModelsProtocolPrivate(Protocol):
--- a/src/llama-stack-api/llama_stack_api/eval.py
+++ b/src/llama-stack-api/llama_stack_api/eval.py
@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol

 from pydantic import BaseModel, Field

-from llama_stack.apis.common.job_types import Job
-from llama_stack.apis.inference import SamplingParams, SystemMessage
-from llama_stack.apis.scoring import ScoringResult
-from llama_stack.apis.scoring_functions import ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.job_types import Job
+from llama_stack_api.inference import SamplingParams, SystemMessage
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring import ScoringResult
+from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/files.py
+++ b/src/llama-stack-api/llama_stack_api/files.py
@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field

-from llama_stack.apis.common.responses import Order
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.responses import Order
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


 # OpenAI Files API Models
--- a/src/llama-stack-api/llama_stack_api/inference.py
+++ b/src/llama-stack-api/llama_stack_api/inference.py
@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict

-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.common.responses import (
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.common.responses import (
    Order,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.models import Model
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.models import Model
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/inspect.py
@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable

 from pydantic import BaseModel

-from llama_stack.apis.version import (
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import (
    LLAMA_STACK_API_V1,
 )
-from llama_stack.providers.datatypes import HealthStatus
-from llama_stack.schema_utils import json_schema_type, webmethod

 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
--- a/src/llama-stack-api/llama_stack_api/models.py
+++ b/src/llama-stack-api/llama_stack_api/models.py
@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable

 from pydantic import BaseModel, ConfigDict, Field, field_validator

-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


 class CommonModelFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/openai_responses.py
@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict

-from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
+from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions

 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
--- a/src/llama-stack-api/llama_stack_api/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/post_training.py
@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol

 from pydantic import BaseModel, Field

-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.job_types import JobStatus
-from llama_stack.apis.common.training_types import Checkpoint
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.job_types import JobStatus
+from llama_stack_api.common.training_types import Checkpoint
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/prompts.py
@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable

 from pydantic import BaseModel, Field, field_validator, model_validator

-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/providers.py
+++ b/src/llama-stack-api/llama_stack_api/providers.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable

 from pydantic import BaseModel

-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.datatypes import HealthResponse
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.datatypes import HealthResponse
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/py.typed
+++ b/src/llama-stack-api/llama_stack_api/py.typed
--- a/src/llama-stack-api/llama_stack_api/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/rag_tool.py
@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal

 from pydantic import BaseModel, Field, field_validator

-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent


 class RRFRanker(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/resource.py
+++ b/src/llama-stack-api/llama_stack_api/resource.py
--- a/src/llama-stack-api/llama_stack_api/safety.py
+++ b/src/llama-stack-api/llama_stack_api/safety.py
@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable

 from pydantic import BaseModel, Field

-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.shields import Shield
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.shields import Shield
+from llama_stack_api.version import LLAMA_STACK_API_V1


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/schema_utils.py
+++ b/src/llama-stack-api/llama_stack_api/schema_utils.py
--- a/src/llama-stack-api/llama_stack_api/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/scoring.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable

 from pydantic import BaseModel

-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1

 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama_stack/apis/scoring_functions/scoring_functions.py
@ -16,10 +16,10 @@ from typing import (

 from pydantic import BaseModel, Field

-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.common.type_system import ParamType
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
--- a/src/llama-stack-api/llama_stack_api/shields.py
+++ b/src/llama-stack-api/llama_stack_api/shields.py
@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable

 from pydantic import BaseModel

-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


 class CommonShieldFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/strong_typing/init.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/init.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/core.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/core.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/name.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/name.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
--- a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
--- a/src/llama-stack-api/llama_stack_api/tools.py
+++ b/src/llama-stack-api/llama_stack_api/tools.py
@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable

-from llama_stack.apis.common.content_types import URL, InterleavedContent
-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/vector_io.py
@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field

-from llama_stack.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
-from llama_stack.strong_typing.schema import register_schema
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import InterleavedContent
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.strong_typing.schema import register_schema
+from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api.version import LLAMA_STACK_API_V1


@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/vector_stores.py
@ -8,7 +8,7 @@ from typing import Literal

 from pydantic import BaseModel

-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType


 # Internal resource type for storing the vector store routing and other information
--- a/src/llama-stack-api/llama_stack_api/version.py
+++ b/src/llama-stack-api/llama_stack_api/version.py
--- a/src/llama-stack-api/pyproject.toml
+++ b/src/llama-stack-api/pyproject.toml
@ -0,0 +1,82 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.uv]
+required-version = ">=0.7.0"
+
+[project]
+name = "llama-stack-api"
+version = "0.1.0"
+authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
+description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
+readme = "README.md"
+requires-python = ">=3.12"
+license = { "text" = "MIT" }
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Information Technology",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+]
+dependencies = [
+    "pydantic>=2.11.9",
+    "jsonschema",
+    "opentelemetry-sdk>=1.30.0",
+    "opentelemetry-exporter-otlp-proto-http>=1.30.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/llamastack/llama-stack"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["llama_stack_api", "llama_stack_api.*"]
+
+[tool.setuptools.package-data]
+llama_stack_api = ["py.typed"]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+    "UP",      # pyupgrade
+    "B",       # flake8-bugbear
+    "B9",      # flake8-bugbear subset
+    "C",       # comprehensions
+    "E",       # pycodestyle
+    "F",       # Pyflakes
+    "N",       # Naming
+    "W",       # Warnings
+    "DTZ",     # datetime rules
+    "I",       # isort (imports order)
+    "RUF001",  # Checks for ambiguous Unicode characters in strings
+    "RUF002",  # Checks for ambiguous Unicode characters in docstrings
+    "RUF003",  # Checks for ambiguous Unicode characters in comments
+    "PLC2401", # Checks for the use of non-ASCII characters in variable names
+]
+ignore = [
+    # The following ignores are desired by the project maintainers.
+    "E402",   # Module level import not at top of file
+    "E501",   # Line too long
+    "F405",   # Maybe undefined or defined from star import
+    "C408",   # Ignored because we like the dict keyword argument syntax
+    "N812",   # Ignored because import torch.nn.functional as F is PyTorch convention
+
+    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
+    "C901",   # Complexity of the function is too high
+]
+unfixable = [
+    "PLE2515",
+] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
+
+[tool.ruff.lint.per-file-ignores]
+"llama_stack_api/apis/**/__init__.py" = ["F403"]
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = ["classmethod", "pydantic.field_validator"]
--- a/src/llama_stack/apis/agents/init.py
+++ b/src/llama_stack/apis/agents/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .agents import *
--- a/src/llama_stack/apis/batches/init.py
+++ b/src/llama_stack/apis/batches/init.py
@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .batches import Batches, BatchObject, ListBatchesResponse
-
-__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
--- a/src/llama_stack/apis/benchmarks/init.py
+++ b/src/llama_stack/apis/benchmarks/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .benchmarks import *
--- a/src/llama_stack/apis/common/init.py
+++ b/src/llama_stack/apis/common/init.py
@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
--- a/src/llama_stack/apis/conversations/init.py
+++ b/src/llama_stack/apis/conversations/init.py
@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .conversations import (
-    Conversation,
-    ConversationDeletedResource,
-    ConversationItem,
-    ConversationItemCreateRequest,
-    ConversationItemDeletedResource,
-    ConversationItemList,
-    Conversations,
-    Metadata,
-)
-
-__all__ = [
-    "Conversation",
-    "ConversationDeletedResource",
-    "ConversationItem",
-    "ConversationItemCreateRequest",
-    "ConversationItemDeletedResource",
-    "ConversationItemList",
-    "Conversations",
-    "Metadata",
-]
--- a/src/llama_stack/apis/datasetio/init.py
+++ b/src/llama_stack/apis/datasetio/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasetio import *
--- a/src/llama_stack/apis/datasets/init.py
+++ b/src/llama_stack/apis/datasets/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasets import *
--- a/src/llama_stack/apis/datatypes.py
+++ b/src/llama_stack/apis/datatypes.py
@ -1,158 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from enum import Enum, EnumMeta
-
-from pydantic import BaseModel, Field
-
-from llama_stack.schema_utils import json_schema_type
-
-
-class DynamicApiMeta(EnumMeta):
-    def __new__(cls, name, bases, namespace):
-        # Store the original enum values
-        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
-
-        # Create the enum class
-        cls = super().__new__(cls, name, bases, namespace)
-
-        # Store the original values for reference
-        cls._original_values = original_values
-        # Initialize _dynamic_values
-        cls._dynamic_values = {}
-
-        return cls
-
-    def __call__(cls, value):
-        try:
-            return super().__call__(value)
-        except ValueError as e:
-            # If this value was already dynamically added, return it
-            if value in cls._dynamic_values:
-                return cls._dynamic_values[value]
-
-            # If the value doesn't exist, create a new enum member
-            # Create a new member name from the value
-            member_name = value.lower().replace("-", "_")
-
-            # If this member name already exists in the enum, return the existing member
-            if member_name in cls._member_map_:
-                return cls._member_map_[member_name]
-
-            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
-            # register new APIs explicitly
-            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
-
-    def __iter__(cls):
-        # Allow iteration over both static and dynamic members
-        yield from super().__iter__()
-        if hasattr(cls, "_dynamic_values"):
-            yield from cls._dynamic_values.values()
-
-    def add(cls, value):
-        """
-        Add a new API to the enum.
-        Used to register external APIs.
-        """
-        member_name = value.lower().replace("-", "_")
-
-        # If this member name already exists in the enum, return it
-        if member_name in cls._member_map_:
-            return cls._member_map_[member_name]
-
-        # Create a new enum member
-        member = object.__new__(cls)
-        member._name_ = member_name
-        member._value_ = value
-
-        # Add it to the enum class
-        cls._member_map_[member_name] = member
-        cls._member_names_.append(member_name)
-        cls._member_type_ = str
-
-        # Store it in our dynamic values
-        cls._dynamic_values[value] = member
-
-        return member
-
-
-@json_schema_type
-class Api(Enum, metaclass=DynamicApiMeta):
-    """Enumeration of all available APIs in the Llama Stack system.
-    :cvar providers: Provider management and configuration
-    :cvar inference: Text generation, chat completions, and embeddings
-    :cvar safety: Content moderation and safety shields
-    :cvar agents: Agent orchestration and execution
-    :cvar batches: Batch processing for asynchronous API requests
-    :cvar vector_io: Vector database operations and queries
-    :cvar datasetio: Dataset input/output operations
-    :cvar scoring: Model output evaluation and scoring
-    :cvar eval: Model evaluation and benchmarking framework
-    :cvar post_training: Fine-tuning and model training
-    :cvar tool_runtime: Tool execution and management
-    :cvar telemetry: Observability and system monitoring
-    :cvar models: Model metadata and management
-    :cvar shields: Safety shield implementations
-    :cvar datasets: Dataset creation and management
-    :cvar scoring_functions: Scoring function definitions
-    :cvar benchmarks: Benchmark suite management
-    :cvar tool_groups: Tool group organization
-    :cvar files: File storage and management
-    :cvar prompts: Prompt versions and management
-    :cvar inspect: Built-in system inspection and introspection
-    """
-
-    providers = "providers"
-    inference = "inference"
-    safety = "safety"
-    agents = "agents"
-    batches = "batches"
-    vector_io = "vector_io"
-    datasetio = "datasetio"
-    scoring = "scoring"
-    eval = "eval"
-    post_training = "post_training"
-    tool_runtime = "tool_runtime"
-
-    models = "models"
-    shields = "shields"
-    vector_stores = "vector_stores"  # only used for routing table
-    datasets = "datasets"
-    scoring_functions = "scoring_functions"
-    benchmarks = "benchmarks"
-    tool_groups = "tool_groups"
-    files = "files"
-    prompts = "prompts"
-    conversations = "conversations"
-
-    # built-in API
-    inspect = "inspect"
-
-
-@json_schema_type
-class Error(BaseModel):
-    """
-    Error response from the API. Roughly follows RFC 7807.
-
-    :param status: HTTP status code
-    :param title: Error title, a short summary of the error which is invariant for an error type
-    :param detail: Error detail, a longer human-readable description of the error
-    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
-    """
-
-    status: int
-    title: str
-    detail: str
-    instance: str | None = None
-
-
-class ExternalApiSpec(BaseModel):
-    """Specification for an external API implementation."""
-
-    module: str = Field(..., description="Python module containing the API implementation")
-    name: str = Field(..., description="Name of the API")
-    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
-    protocol: str = Field(..., description="Name of the protocol class for the API")
--- a/src/llama_stack/apis/eval/init.py
+++ b/src/llama_stack/apis/eval/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .eval import *
--- a/src/llama_stack/apis/files/init.py
+++ b/src/llama_stack/apis/files/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .files import *
--- a/src/llama_stack/apis/inference/init.py
+++ b/src/llama_stack/apis/inference/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inference import *
--- a/src/llama_stack/apis/inspect/init.py
+++ b/src/llama_stack/apis/inspect/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inspect import *
--- a/src/llama_stack/apis/models/init.py
+++ b/src/llama_stack/apis/models/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .models import *
--- a/src/llama_stack/apis/post_training/init.py
+++ b/src/llama_stack/apis/post_training/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .post_training import *
--- a/src/llama_stack/apis/prompts/init.py
+++ b/src/llama_stack/apis/prompts/init.py
@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .prompts import ListPromptsResponse, Prompt, Prompts
-
-__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
--- a/src/llama_stack/apis/providers/init.py
+++ b/src/llama_stack/apis/providers/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .providers import *
--- a/src/llama_stack/apis/safety/init.py
+++ b/src/llama_stack/apis/safety/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .safety import *
--- a/src/llama_stack/apis/scoring/init.py
+++ b/src/llama_stack/apis/scoring/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring import *
--- a/src/llama_stack/apis/scoring_functions/init.py
+++ b/src/llama_stack/apis/scoring_functions/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring_functions import *
--- a/src/llama_stack/apis/shields/init.py
+++ b/src/llama_stack/apis/shields/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .shields import *
--- a/src/llama_stack/apis/tools/init.py
+++ b/src/llama_stack/apis/tools/init.py
@ -1,8 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .rag_tool import *
-from .tools import *
--- a/src/llama_stack/apis/vector_io/init.py
+++ b/src/llama_stack/apis/vector_io/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_io import *
--- a/src/llama_stack/apis/vector_stores/init.py
+++ b/src/llama_stack/apis/vector_stores/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_stores import *
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@ -9,6 +9,7 @@ import sys
 from pathlib import Path

 import yaml
+from llama_stack_api import Api
 from termcolor import cprint

 from llama_stack.cli.stack.utils import ImageType
@ -21,7 +22,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api

 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"

--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@ -11,6 +11,7 @@ from functools import lru_cache
 from pathlib import Path

 import yaml
+from llama_stack_api import Api
 from termcolor import cprint

 from llama_stack.core.datatypes import (
@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
-from llama_stack.providers.datatypes import Api

 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"

--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -6,6 +6,7 @@

 import sys

+from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint

@ -13,7 +14,6 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api

 log = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@ -12,11 +12,10 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin

 import httpx
+from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint

-from llama_stack.providers.datatypes import RemoteProviderConfig
-
 _CLIENT_CLASSES = {}


--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -6,6 +6,8 @@
 import textwrap
 from typing import Any

+from llama_stack_api import Api, ProviderSpec
+
 from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
    DistributionSpec,
@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, ProviderSpec

 logger = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -8,9 +8,7 @@ import secrets
 import time
 from typing import Any, Literal

-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.apis.conversations.conversations import (
+from llama_stack_api import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
    Conversations,
    Metadata,
 )
+from pydantic import BaseModel, TypeAdapter
+
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -9,22 +9,34 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse

+from llama_stack_api import (
+    Api,
+    Benchmark,
+    BenchmarkInput,
+    Dataset,
+    DatasetInput,
+    DatasetIO,
+    Eval,
+    Inference,
+    Model,
+    ModelInput,
+    ProviderSpec,
+    Resource,
+    Safety,
+    Scoring,
+    ScoringFn,
+    ScoringFnInput,
+    Shield,
+    ShieldInput,
+    ToolGroup,
+    ToolGroupInput,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+    VectorStoreInput,
+)
 from pydantic import BaseModel, Field, field_validator, model_validator

-from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset, DatasetInput
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Model, ModelInput
-from llama_stack.apis.resource import Resource
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack.apis.shields import Shield, ShieldInput
-from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
@ -32,7 +44,6 @@ from llama_stack.core.storage.datatypes import (
    StorageConfig,
 )
 from llama_stack.log import LoggingConfig
-from llama_stack.providers.datatypes import Api, ProviderSpec

 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@ -10,17 +10,17 @@ import os
 from typing import Any

 import yaml
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
    Api,
    InlineProviderSpec,
    ProviderSpec,
    RemoteProviderSpec,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger

 logger = get_logger(name=__name__, category="core")

--- a/Show more
+++ b/Show more