feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-11-13 14:51:17 -05:00 · 2025-11-13 14:51:17 -05:00 · 840ad75fe9
commit 840ad75fe9
parent ceb716b9a0
358 changed files with 2337 additions and 1424 deletions
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@ -30,13 +30,16 @@ jobs:
        activate-environment: true
        version: 0.7.6
-    - name: Build Llama Stack package
+    - name: Build Llama Stack API package
-      run: |
+      working-directory: src/llama-stack-api
-        uv build
+      run: uv build
-    - name: Install Llama Stack package
+    - name: Build Llama Stack package
      run: uv build
    - name: Install Llama Stack package (with api stubs from local build)
      run: |
-        uv pip install dist/*.whl
+        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
    - name: Verify Llama Stack package
      run: |
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -42,7 +42,7 @@ repos:
    hooks:
    -   id: ruff
        args: [ --fix ]
-        exclude: ^src/llama_stack/strong_typing/.*$
+        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
    -   id: ruff-format
 -   repo: https://github.com/adamchainz/blacken-docs
--- a/docs/docs/concepts/apis/external.mdx
+++ b/docs/docs/concepts/apis/external.mdx
@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha
 ```python
 # llama_stack_api_weather/api.py
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 def available_providers() -> list[ProviderSpec]:
@ -79,7 +79,7 @@ A Protocol class like so:
 # llama_stack_api_weather/api.py
 from typing import Protocol
-from llama_stack.schema_utils import webmethod
+from llama_stack_api import webmethod
 class WeatherAPI(Protocol):
@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"]
 # llama-stack-api-weather/src/llama_stack_api_weather/weather.py
 from typing import Protocol
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
    Api,
    ProviderSpec,
    RemoteProviderSpec,
    webmethod,
 )
 from llama_stack.schema_utils import webmethod
 def available_providers() -> list[ProviderSpec]:
    return [
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d
 Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
 ```python
-from llama_stack.providers.datatypes import ProviderSpec
+from llama_stack_api.providers.datatypes import ProviderSpec
 def get_provider_spec() -> ProviderSpec:
--- a/docs/docs/providers/external/external-providers-guide.mdx
+++ b/docs/docs/providers/external/external-providers-guide.mdx
@ -80,7 +80,7 @@ container_image: custom-vector-store:latest  # optional
 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
 ```python
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
    ProviderSpec,
    Api,
    RemoteProviderSpec,
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -153,7 +153,7 @@ description: |
  Example using RAGQueryConfig with different search modes:
  ```python
-  from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
  # Vector search
  config = RAGQueryConfig(mode="vector", max_chunks=5)
@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
-from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 from .pyopenapi.options import Options  # noqa: E402
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -16,27 +16,27 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 from fastapi import UploadFile
-from llama_stack.apis.datatypes import Error
+from llama_stack_api import (
-from llama_stack.strong_typing.core import JsonType
+    Docstring,
-from llama_stack.strong_typing.docstring import Docstring, parse_type
+    Error,
-from llama_stack.strong_typing.inspection import (
+    JsonSchemaGenerator,
    JsonType,
    Schema,
    SchemaOptions,
    get_schema_identifier,
    is_generic_list,
    is_type_optional,
    is_type_union,
    is_unwrapped_body_param,
    json_dump_string,
    object_to_json,
    parse_type,
    python_type_to_name,
    register_schema,
    unwrap_generic_list,
    unwrap_optional_type,
    unwrap_union_types,
 )
 from llama_stack.strong_typing.name import python_type_to_name
 from llama_stack.strong_typing.schema import (
    get_schema_identifier,
    JsonSchemaGenerator,
    register_schema,
    Schema,
    SchemaOptions,
 )
 from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
 from pydantic import BaseModel
 from .operations import (
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -11,19 +11,21 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 from termcolor import colored
 from llama_stack.strong_typing.inspection import get_signature
 from typing import get_origin, get_args
 from fastapi import UploadFile
 from fastapi.params import File, Form
 from typing import Annotated
-from llama_stack.schema_utils import ExtraBodyField
+from llama_stack_api import (
    ExtraBodyField,
    LLAMA_STACK_API_V1,
    LLAMA_STACK_API_V1ALPHA,
    LLAMA_STACK_API_V1BETA,
    get_signature,
 )
 def split_prefix(
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@ -9,7 +9,7 @@ import enum
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union
-from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType
+from llama_stack_api import JsonType, Schema, StrictJsonType
 URL = str
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@ -11,8 +11,7 @@ from pathlib import Path
 from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args
 from pydantic import BaseModel
-from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
+from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json
 from llama_stack.strong_typing.inspection import is_unwrapped_body_param
 from llama_stack.core.resolver import api_protocol_map
 from .generator import Generator
--- a/pyproject.toml
+++ b/pyproject.toml
@ -31,6 +31,7 @@ dependencies = [
    "httpx",
    "jinja2>=3.1.6",
    "jsonschema",
    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
    "openai>=2.5.0",
    "prompt-toolkit",
    "python-dotenv",
@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*"]
+include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
 [[tool.uv.index]]
 name = "pytorch-cpu"
@ -190,6 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
 llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
 [tool.ruff]
 line-length = 120
@ -256,8 +258,8 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 [tool.mypy]
-mypy_path = ["src"]
+mypy_path = ["src", "src/llama-stack-api"]
-packages = ["llama_stack"]
+packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
 warn_return_any = true
@ -279,15 +281,18 @@ exclude = [
    "^src/llama_stack/core/store/registry\\.py$",
    "^src/llama_stack/core/utils/exec\\.py$",
    "^src/llama_stack/core/utils/prompt_for_config\\.py$",
    # Moved to llama-stack-api but still excluded
    "^src/llama_stack/models/llama/llama3/interface\\.py$",
    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
    "^src/llama_stack/providers/inline/datasetio/localfs/",
    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/models/llama/llama3/generation\\.py$",
    "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
    "^src/llama_stack/models/llama/llama4/",
    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
    "^src/llama_stack/providers/inline/agents/meta_reference/",
    "^src/llama_stack/providers/inline/datasetio/localfs/",
    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
    "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
    "^src/llama_stack/providers/inline/safety/code_scanner/",
@ -337,7 +342,9 @@ exclude = [
    "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
    "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
    "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
    "^src/llama_stack/distributions/template\\.py$",
 ]
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@ -14,8 +14,8 @@ import os
 from pathlib import Path
 import fire
 from llama_stack_api import ModelNotFoundError
 from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
    """Extract docstring from the API protocol class."""
    try:
        # Import the API module dynamically
-        api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()])
        # Get the main protocol class (usually capitalized API name)
        protocol_class_name = api_name.title()
@ -83,8 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                # this string replace is ridiculous
                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.inference.", "")
                field_type = field_type.replace("llama_stack.providers.", "")
                field_type = field_type.replace("llama_stack_api.datatypes.", "")
                default_value = field.default
                if field.default_factory is not None:
--- a/src/llama-stack-api/README.md
+++ b/src/llama-stack-api/README.md
@ -0,0 +1,103 @@
 # llama-stack-api
 API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs.
 ## Overview
 `llama-stack-api` is a minimal dependency package that contains:
 - **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.)
 - **Provider Specifications**: Provider spec definitions for building custom providers
 - **Data Types**: Shared data types and models used across the Llama Stack ecosystem
 - **Type Utilities**: Strong typing utilities and schema validation
 ## What This Package Does NOT Include
 - Server implementation (see `llama-stack` package)
 - Provider implementations (see `llama-stack` package)
 - CLI tools (see `llama-stack` package)
 - Runtime orchestration (see `llama-stack` package)
 ## Use Cases
 This package is designed for:
 1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server
 2. **Client Library Authors**: Use type definitions without server dependencies
 3. **Documentation Generation**: Generate API docs from protocol definitions
 4. **Type Checking**: Validate implementations against the official specs
 ## Installation
 ```bash
 pip install llama-stack-api
 ```
 Or with uv:
 ```bash
 uv pip install llama-stack-api
 ```
 ## Dependencies
 Minimal dependencies:
 - `pydantic>=2.11.9` - For data validation and serialization
 - `jsonschema` - For JSON schema utilities
 ## Versioning
 This package follows semantic versioning independently from the main `llama-stack` package:
 - **Patch versions** (0.1.x): Documentation, internal improvements
 - **Minor versions** (0.x.0): New APIs, backward-compatible changes
 - **Major versions** (x.0.0): Breaking changes to existing APIs
 Current version: **0.1.0**
 ## Usage Example
 ```python
 from llama_stack_api.inference import Inference, ChatCompletionRequest
 from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
 from llama_stack_api.datatypes import Api
 # Use protocol definitions for type checking
 class MyInferenceProvider(Inference):
    async def chat_completion(self, request: ChatCompletionRequest):
        # Your implementation
        pass
 # Define provider specifications
 my_provider_spec = InlineProviderSpec(
    api=Api.inference,
    provider_type="inline::my-provider",
    pip_packages=["my-dependencies"],
    module="my_package.providers.inference",
    config_class="my_package.providers.inference.MyConfig",
 )
 ```
 ## Relationship to llama-stack
 The main `llama-stack` package depends on `llama-stack-api` and provides:
 - Full server implementation
 - Built-in provider implementations
 - CLI tools for running and managing stacks
 - Runtime provider resolution and orchestration
 ## Contributing
 See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines.
 ## License
 MIT License - see LICENSE file for details.
 ## Links
 - [Main Llama Stack Repository](https://github.com/llamastack/llama-stack)
 - [Documentation](https://llamastack.ai/)
 - [Client Library](https://pypi.org/project/llama-stack-client/)
--- a/src/llama-stack-api/llama_stack_api/init.py
+++ b/src/llama-stack-api/llama_stack_api/init.py
@ -0,0 +1,871 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """
 Llama Stack API Specifications
 This package contains the API definitions, data types, and protocol specifications
 for Llama Stack. It is designed to be a lightweight dependency for external providers
 and clients that need to interact with Llama Stack APIs without requiring the full
 server implementation.
 All imports from this package MUST use the form:
    from llama_stack_api import <symbol>
 Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported
 and considered a code smell. All exported symbols are explicitly listed in __all__.
 """
 __version__ = "0.4.0"
 # Import submodules for those who need them
 from . import common, strong_typing  # noqa: F401
 # Import all public API symbols
 from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
 from .batches import Batches, BatchObject, ListBatchesResponse
 from .benchmarks import (
    Benchmark,
    BenchmarkInput,
    Benchmarks,
    CommonBenchmarkFields,
    ListBenchmarksResponse,
 )
 # Import commonly used types from common submodule
 from .common.content_types import (
    URL,
    ImageContentItem,
    InterleavedContent,
    InterleavedContentItem,
    TextContentItem,
    _URLOrData,
 )
 from .common.errors import (
    ConflictError,
    DatasetNotFoundError,
    InvalidConversationIdError,
    ModelNotFoundError,
    ModelTypeError,
    ResourceNotFoundError,
    TokenValidationError,
    ToolGroupNotFoundError,
    UnsupportedModelError,
    VectorStoreNotFoundError,
 )
 from .common.job_types import Job, JobStatus
 from .common.responses import Order, PaginatedResponse
 from .common.training_types import Checkpoint, PostTrainingMetric
 from .common.type_system import (
    ChatCompletionInputType,
    CompletionInputType,
    NumberType,
    ParamType,
    StringType,
 )
 from .conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
    ConversationItemCreateRequest,
    ConversationItemDeletedResource,
    ConversationItemInclude,
    ConversationItemList,
    ConversationMessage,
    Conversations,
    Metadata,
 )
 from .datasetio import DatasetIO, DatasetStore
 from .datasets import (
    CommonDatasetFields,
    Dataset,
    DatasetInput,
    DatasetPurpose,
    Datasets,
    DatasetType,
    DataSource,
    ListDatasetsResponse,
    RowsDataSource,
    URIDataSource,
 )
 from .datatypes import (
    Api,
    BenchmarksProtocolPrivate,
    DatasetsProtocolPrivate,
    DynamicApiMeta,
    Error,
    ExternalApiSpec,
    HealthResponse,
    HealthStatus,
    InlineProviderSpec,
    ModelsProtocolPrivate,
    ProviderSpec,
    RemoteProviderConfig,
    RemoteProviderSpec,
    RoutingTable,
    ScoringFunctionsProtocolPrivate,
    ShieldsProtocolPrivate,
    ToolGroupsProtocolPrivate,
    VectorStoresProtocolPrivate,
 )
 from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
 from .files import (
    ExpiresAfter,
    Files,
    ListOpenAIFileResponse,
    OpenAIFileDeleteResponse,
    OpenAIFileObject,
    OpenAIFilePurpose,
 )
 from .inference import (
    Bf16QuantizationConfig,
    ChatCompletionResponseEventType,
    CompletionRequest,
    EmbeddingsResponse,
    EmbeddingTaskType,
    Fp8QuantizationConfig,
    GrammarResponseFormat,
    GreedySamplingStrategy,
    Inference,
    InferenceProvider,
    Int4QuantizationConfig,
    JsonSchemaResponseFormat,
    ListOpenAIChatCompletionResponse,
    LogProbConfig,
    ModelStore,
    OpenAIAssistantMessageParam,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartParam,
    OpenAIChatCompletionContentPartTextParam,
    OpenAIChatCompletionMessageContent,
    OpenAIChatCompletionRequestWithExtraBody,
    OpenAIChatCompletionTextOnlyMessageContent,
    OpenAIChatCompletionToolCall,
    OpenAIChatCompletionToolCallFunction,
    OpenAIChatCompletionUsage,
    OpenAIChatCompletionUsageCompletionTokensDetails,
    OpenAIChatCompletionUsagePromptTokensDetails,
    OpenAIChoice,
    OpenAIChoiceDelta,
    OpenAIChoiceLogprobs,
    OpenAIChunkChoice,
    OpenAICompletion,
    OpenAICompletionChoice,
    OpenAICompletionLogprobs,
    OpenAICompletionRequestWithExtraBody,
    OpenAICompletionWithInputMessages,
    OpenAIDeveloperMessageParam,
    OpenAIEmbeddingData,
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIFile,
    OpenAIFileFile,
    OpenAIImageURL,
    OpenAIJSONSchema,
    OpenAIMessageParam,
    OpenAIResponseFormatJSONObject,
    OpenAIResponseFormatJSONSchema,
    OpenAIResponseFormatParam,
    OpenAIResponseFormatText,
    OpenAISystemMessageParam,
    OpenAITokenLogProb,
    OpenAIToolMessageParam,
    OpenAITopLogProb,
    OpenAIUserMessageParam,
    QuantizationConfig,
    QuantizationType,
    RerankData,
    RerankResponse,
    ResponseFormat,
    ResponseFormatType,
    SamplingParams,
    SamplingStrategy,
    SystemMessage,
    SystemMessageBehavior,
    TextTruncation,
    TokenLogProbs,
    ToolChoice,
    ToolResponseMessage,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
    UserMessage,
 )
 from .inspect import (
    ApiFilter,
    HealthInfo,
    Inspect,
    ListRoutesResponse,
    RouteInfo,
    VersionInfo,
 )
 from .models import (
    CommonModelFields,
    ListModelsResponse,
    Model,
    ModelInput,
    Models,
    ModelType,
    OpenAIListModelsResponse,
    OpenAIModel,
 )
 from .openai_responses import (
    AllowedToolsFilter,
    ApprovalFilter,
    ListOpenAIResponseInputItem,
    ListOpenAIResponseObject,
    MCPListToolsTool,
    OpenAIDeleteResponseObject,
    OpenAIResponseAnnotationCitation,
    OpenAIResponseAnnotationContainerFileCitation,
    OpenAIResponseAnnotationFileCitation,
    OpenAIResponseAnnotationFilePath,
    OpenAIResponseAnnotations,
    OpenAIResponseContentPart,
    OpenAIResponseContentPartOutputText,
    OpenAIResponseContentPartReasoningSummary,
    OpenAIResponseContentPartReasoningText,
    OpenAIResponseContentPartRefusal,
    OpenAIResponseError,
    OpenAIResponseInput,
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseInputMessageContent,
    OpenAIResponseInputMessageContentFile,
    OpenAIResponseInputMessageContentImage,
    OpenAIResponseInputMessageContentText,
    OpenAIResponseInputTool,
    OpenAIResponseInputToolFileSearch,
    OpenAIResponseInputToolFunction,
    OpenAIResponseInputToolMCP,
    OpenAIResponseInputToolWebSearch,
    OpenAIResponseMCPApprovalRequest,
    OpenAIResponseMCPApprovalResponse,
    OpenAIResponseMessage,
    OpenAIResponseObject,
    OpenAIResponseObjectStream,
    OpenAIResponseObjectStreamResponseCompleted,
    OpenAIResponseObjectStreamResponseContentPartAdded,
    OpenAIResponseObjectStreamResponseContentPartDone,
    OpenAIResponseObjectStreamResponseCreated,
    OpenAIResponseObjectStreamResponseFailed,
    OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
    OpenAIResponseObjectStreamResponseFileSearchCallInProgress,
    OpenAIResponseObjectStreamResponseFileSearchCallSearching,
    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
    OpenAIResponseObjectStreamResponseIncomplete,
    OpenAIResponseObjectStreamResponseInProgress,
    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
    OpenAIResponseObjectStreamResponseMcpCallCompleted,
    OpenAIResponseObjectStreamResponseMcpCallFailed,
    OpenAIResponseObjectStreamResponseMcpCallInProgress,
    OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
    OpenAIResponseObjectStreamResponseMcpListToolsFailed,
    OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
    OpenAIResponseObjectStreamResponseOutputItemAdded,
    OpenAIResponseObjectStreamResponseOutputItemDone,
    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded,
    OpenAIResponseObjectStreamResponseOutputTextDelta,
    OpenAIResponseObjectStreamResponseOutputTextDone,
    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded,
    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone,
    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta,
    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone,
    OpenAIResponseObjectStreamResponseReasoningTextDelta,
    OpenAIResponseObjectStreamResponseReasoningTextDone,
    OpenAIResponseObjectStreamResponseRefusalDelta,
    OpenAIResponseObjectStreamResponseRefusalDone,
    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
    OpenAIResponseObjectWithInput,
    OpenAIResponseOutput,
    OpenAIResponseOutputMessageContent,
    OpenAIResponseOutputMessageContentOutputText,
    OpenAIResponseOutputMessageFileSearchToolCall,
    OpenAIResponseOutputMessageFileSearchToolCallResults,
    OpenAIResponseOutputMessageFunctionToolCall,
    OpenAIResponseOutputMessageMCPCall,
    OpenAIResponseOutputMessageMCPListTools,
    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponsePrompt,
    OpenAIResponseText,
    OpenAIResponseTextFormat,
    OpenAIResponseTool,
    OpenAIResponseToolMCP,
    OpenAIResponseUsage,
    OpenAIResponseUsageInputTokensDetails,
    OpenAIResponseUsageOutputTokensDetails,
    WebSearchToolTypes,
 )
 from .post_training import (
    AlgorithmConfig,
    DataConfig,
    DatasetFormat,
    DPOAlignmentConfig,
    DPOLossType,
    EfficiencyConfig,
    ListPostTrainingJobsResponse,
    LoraFinetuningConfig,
    OptimizerConfig,
    OptimizerType,
    PostTraining,
    PostTrainingJob,
    PostTrainingJobArtifactsResponse,
    PostTrainingJobLogStream,
    PostTrainingJobStatusResponse,
    PostTrainingRLHFRequest,
    QATFinetuningConfig,
    RLHFAlgorithm,
    TrainingConfig,
 )
 from .prompts import ListPromptsResponse, Prompt, Prompts
 from .providers import ListProvidersResponse, ProviderInfo, Providers
 from .rag_tool import (
    DefaultRAGQueryGeneratorConfig,
    LLMRAGQueryGeneratorConfig,
    RAGDocument,
    RAGQueryConfig,
    RAGQueryGenerator,
    RAGQueryGeneratorConfig,
    RAGQueryResult,
    RAGSearchMode,
    Ranker,
    RRFRanker,
    WeightedRanker,
 )
 from .resource import Resource, ResourceType
 from .safety import (
    ModerationObject,
    ModerationObjectResults,
    RunShieldResponse,
    Safety,
    SafetyViolation,
    ShieldStore,
    ViolationLevel,
 )
 from .schema_utils import (
    CallableT,
    ExtraBodyField,
    WebMethod,
    json_schema_type,
    register_schema,
    webmethod,
 )
 from .scoring import (
    ScoreBatchResponse,
    ScoreResponse,
    Scoring,
    ScoringFunctionStore,
    ScoringResult,
    ScoringResultRow,
 )
 from .scoring_functions import (
    AggregationFunctionType,
    BasicScoringFnParams,
    CommonScoringFnFields,
    ListScoringFunctionsResponse,
    LLMAsJudgeScoringFnParams,
    RegexParserScoringFnParams,
    ScoringFn,
    ScoringFnInput,
    ScoringFnParams,
    ScoringFnParamsType,
    ScoringFunctions,
 )
 from .shields import (
    CommonShieldFields,
    ListShieldsResponse,
    Shield,
    ShieldInput,
    Shields,
 )
 # Import from strong_typing
 from .strong_typing.core import JsonType
 from .strong_typing.docstring import Docstring, parse_type
 from .strong_typing.inspection import (
    get_signature,
    is_generic_list,
    is_type_optional,
    is_type_union,
    is_unwrapped_body_param,
    unwrap_generic_list,
    unwrap_optional_type,
    unwrap_union_types,
 )
 from .strong_typing.name import python_type_to_name
 from .strong_typing.schema import (
    JsonSchemaGenerator,
    Schema,
    SchemaOptions,
    StrictJsonType,
    get_schema_identifier,
 )
 from .strong_typing.serialization import json_dump_string, object_to_json
 from .tools import (
    ListToolDefsResponse,
    ListToolGroupsResponse,
    SpecialToolGroup,
    ToolDef,
    ToolGroup,
    ToolGroupInput,
    ToolGroups,
    ToolInvocationResult,
    ToolRuntime,
    ToolStore,
 )
 from .vector_io import (
    Chunk,
    ChunkMetadata,
    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
    OpenAICreateVectorStoreRequestWithExtraBody,
    QueryChunksResponse,
    SearchRankingOptions,
    VectorIO,
    VectorStoreChunkingStrategy,
    VectorStoreChunkingStrategyAuto,
    VectorStoreChunkingStrategyStatic,
    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreContent,
    VectorStoreCreateRequest,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
    VectorStoreFileContentResponse,
    VectorStoreFileCounts,
    VectorStoreFileDeleteResponse,
    VectorStoreFileLastError,
    VectorStoreFileObject,
    VectorStoreFilesListInBatchResponse,
    VectorStoreFileStatus,
    VectorStoreListFilesResponse,
    VectorStoreListResponse,
    VectorStoreModifyRequest,
    VectorStoreObject,
    VectorStoreSearchRequest,
    VectorStoreSearchResponse,
    VectorStoreSearchResponsePage,
    VectorStoreTable,
 )
 from .vector_stores import VectorStore, VectorStoreInput
 from .version import (
    LLAMA_STACK_API_V1,
    LLAMA_STACK_API_V1ALPHA,
    LLAMA_STACK_API_V1BETA,
 )
 __all__ = [
    # Submodules
    "common",
    "strong_typing",
    # Version constants
    "LLAMA_STACK_API_V1",
    "LLAMA_STACK_API_V1ALPHA",
    "LLAMA_STACK_API_V1BETA",
    # API Symbols
    "Agents",
    "AggregationFunctionType",
    "AlgorithmConfig",
    "AllowedToolsFilter",
    "Api",
    "ApiFilter",
    "ApprovalFilter",
    "BasicScoringFnParams",
    "Batches",
    "BatchObject",
    "Benchmark",
    "BenchmarkConfig",
    "BenchmarkInput",
    "Benchmarks",
    "BenchmarksProtocolPrivate",
    "Bf16QuantizationConfig",
    "CallableT",
    "ChatCompletionInputType",
    "ChatCompletionResponseEventType",
    "Checkpoint",
    "Chunk",
    "ChunkMetadata",
    "CommonBenchmarkFields",
    "ConflictError",
    "CommonDatasetFields",
    "CommonModelFields",
    "CommonScoringFnFields",
    "CommonShieldFields",
    "CompletionInputType",
    "CompletionRequest",
    "Conversation",
    "ConversationDeletedResource",
    "ConversationItem",
    "ConversationItemCreateRequest",
    "ConversationItemDeletedResource",
    "ConversationItemInclude",
    "ConversationItemList",
    "ConversationMessage",
    "Conversations",
    "DPOAlignmentConfig",
    "DPOLossType",
    "DataConfig",
    "DataSource",
    "Dataset",
    "DatasetFormat",
    "DatasetIO",
    "DatasetInput",
    "DatasetPurpose",
    "DatasetNotFoundError",
    "DatasetStore",
    "DatasetType",
    "Datasets",
    "DatasetsProtocolPrivate",
    "DefaultRAGQueryGeneratorConfig",
    "Docstring",
    "DynamicApiMeta",
    "EfficiencyConfig",
    "EmbeddingTaskType",
    "EmbeddingsResponse",
    "Error",
    "Eval",
    "EvalCandidate",
    "EvaluateResponse",
    "ExpiresAfter",
    "ExternalApiSpec",
    "ExtraBodyField",
    "Files",
    "Fp8QuantizationConfig",
    "get_schema_identifier",
    "get_signature",
    "GrammarResponseFormat",
    "GreedySamplingStrategy",
    "HealthInfo",
    "HealthResponse",
    "HealthStatus",
    "ImageContentItem",
    "Inference",
    "InferenceProvider",
    "InlineProviderSpec",
    "Inspect",
    "Int4QuantizationConfig",
    "InterleavedContent",
    "InterleavedContentItem",
    "InvalidConversationIdError",
    "is_generic_list",
    "is_type_optional",
    "is_type_union",
    "is_unwrapped_body_param",
    "Job",
    "JobStatus",
    "json_dump_string",
    "json_schema_type",
    "JsonSchemaGenerator",
    "JsonSchemaResponseFormat",
    "JsonType",
    "LLMAsJudgeScoringFnParams",
    "LLMRAGQueryGeneratorConfig",
    "ListBatchesResponse",
    "ListBenchmarksResponse",
    "ListDatasetsResponse",
    "ListModelsResponse",
    "ListOpenAIChatCompletionResponse",
    "ListOpenAIFileResponse",
    "ListOpenAIResponseInputItem",
    "ListOpenAIResponseObject",
    "ListPostTrainingJobsResponse",
    "ListPromptsResponse",
    "ListProvidersResponse",
    "ListRoutesResponse",
    "ListScoringFunctionsResponse",
    "ListShieldsResponse",
    "ListToolDefsResponse",
    "ListToolGroupsResponse",
    "LogProbConfig",
    "LoraFinetuningConfig",
    "MCPListToolsTool",
    "Metadata",
    "Model",
    "ModelCandidate",
    "ModelInput",
    "ModelNotFoundError",
    "ModelStore",
    "ModelType",
    "ModelTypeError",
    "Models",
    "ModelsProtocolPrivate",
    "ModerationObject",
    "ModerationObjectResults",
    "NumberType",
    "object_to_json",
    "OpenAIAssistantMessageParam",
    "OpenAIChatCompletion",
    "OpenAIChatCompletionChunk",
    "OpenAIChatCompletionContentPartImageParam",
    "OpenAIChatCompletionContentPartParam",
    "OpenAIChatCompletionContentPartTextParam",
    "OpenAIChatCompletionMessageContent",
    "OpenAIChatCompletionRequestWithExtraBody",
    "OpenAIChatCompletionTextOnlyMessageContent",
    "OpenAIChatCompletionToolCall",
    "OpenAIChatCompletionToolCallFunction",
    "OpenAIChatCompletionUsage",
    "OpenAIChatCompletionUsageCompletionTokensDetails",
    "OpenAIChatCompletionUsagePromptTokensDetails",
    "OpenAIChoice",
    "OpenAIChoiceDelta",
    "OpenAIChoiceLogprobs",
    "OpenAIChunkChoice",
    "OpenAICompletion",
    "OpenAICompletionChoice",
    "OpenAICompletionLogprobs",
    "OpenAICompletionRequestWithExtraBody",
    "OpenAICompletionWithInputMessages",
    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
    "OpenAICreateVectorStoreRequestWithExtraBody",
    "OpenAIDeleteResponseObject",
    "OpenAIDeveloperMessageParam",
    "OpenAIEmbeddingData",
    "OpenAIEmbeddingUsage",
    "OpenAIEmbeddingsRequestWithExtraBody",
    "OpenAIEmbeddingsResponse",
    "OpenAIFile",
    "OpenAIFileDeleteResponse",
    "OpenAIFileFile",
    "OpenAIFileObject",
    "OpenAIFilePurpose",
    "OpenAIImageURL",
    "OpenAIJSONSchema",
    "OpenAIListModelsResponse",
    "OpenAIMessageParam",
    "OpenAIModel",
    "Order",
    "OpenAIResponseAnnotationCitation",
    "OpenAIResponseAnnotationContainerFileCitation",
    "OpenAIResponseAnnotationFileCitation",
    "OpenAIResponseAnnotationFilePath",
    "OpenAIResponseAnnotations",
    "OpenAIResponseContentPart",
    "OpenAIResponseContentPartOutputText",
    "OpenAIResponseContentPartReasoningSummary",
    "OpenAIResponseContentPartReasoningText",
    "OpenAIResponseContentPartRefusal",
    "OpenAIResponseError",
    "OpenAIResponseFormatJSONObject",
    "OpenAIResponseFormatJSONSchema",
    "OpenAIResponseFormatParam",
    "OpenAIResponseFormatText",
    "OpenAIResponseInput",
    "OpenAIResponseInputFunctionToolCallOutput",
    "OpenAIResponseInputMessageContent",
    "OpenAIResponseInputMessageContentFile",
    "OpenAIResponseInputMessageContentImage",
    "OpenAIResponseInputMessageContentText",
    "OpenAIResponseInputTool",
    "OpenAIResponseInputToolFileSearch",
    "OpenAIResponseInputToolFunction",
    "OpenAIResponseInputToolMCP",
    "OpenAIResponseInputToolWebSearch",
    "OpenAIResponseMCPApprovalRequest",
    "OpenAIResponseMCPApprovalResponse",
    "OpenAIResponseMessage",
    "OpenAIResponseObject",
    "OpenAIResponseObjectStream",
    "OpenAIResponseObjectStreamResponseCompleted",
    "OpenAIResponseObjectStreamResponseContentPartAdded",
    "OpenAIResponseObjectStreamResponseContentPartDone",
    "OpenAIResponseObjectStreamResponseCreated",
    "OpenAIResponseObjectStreamResponseFailed",
    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted",
    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress",
    "OpenAIResponseObjectStreamResponseFileSearchCallSearching",
    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
    "OpenAIResponseObjectStreamResponseInProgress",
    "OpenAIResponseObjectStreamResponseIncomplete",
    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
    "OpenAIResponseObjectStreamResponseMcpCallFailed",
    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
    "OpenAIResponseObjectStreamResponseOutputItemAdded",
    "OpenAIResponseObjectStreamResponseOutputItemDone",
    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded",
    "OpenAIResponseObjectStreamResponseOutputTextDelta",
    "OpenAIResponseObjectStreamResponseOutputTextDone",
    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded",
    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone",
    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta",
    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone",
    "OpenAIResponseObjectStreamResponseReasoningTextDelta",
    "OpenAIResponseObjectStreamResponseReasoningTextDone",
    "OpenAIResponseObjectStreamResponseRefusalDelta",
    "OpenAIResponseObjectStreamResponseRefusalDone",
    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
    "OpenAIResponseObjectWithInput",
    "OpenAIResponseOutput",
    "OpenAIResponseOutputMessageContent",
    "OpenAIResponseOutputMessageContentOutputText",
    "OpenAIResponseOutputMessageFileSearchToolCall",
    "OpenAIResponseOutputMessageFileSearchToolCallResults",
    "OpenAIResponseOutputMessageFunctionToolCall",
    "OpenAIResponseOutputMessageMCPCall",
    "OpenAIResponseOutputMessageMCPListTools",
    "OpenAIResponseOutputMessageWebSearchToolCall",
    "OpenAIResponsePrompt",
    "OpenAIResponseText",
    "OpenAIResponseTextFormat",
    "OpenAIResponseTool",
    "OpenAIResponseToolMCP",
    "OpenAIResponseUsage",
    "OpenAIResponseUsageInputTokensDetails",
    "OpenAIResponseUsageOutputTokensDetails",
    "OpenAISystemMessageParam",
    "OpenAITokenLogProb",
    "OpenAIToolMessageParam",
    "OpenAITopLogProb",
    "OpenAIUserMessageParam",
    "OptimizerConfig",
    "OptimizerType",
    "PaginatedResponse",
    "ParamType",
    "parse_type",
    "PostTraining",
    "PostTrainingMetric",
    "PostTrainingJob",
    "PostTrainingJobArtifactsResponse",
    "PostTrainingJobLogStream",
    "PostTrainingJobStatusResponse",
    "PostTrainingRLHFRequest",
    "Prompt",
    "Prompts",
    "ProviderInfo",
    "ProviderSpec",
    "Providers",
    "python_type_to_name",
    "QATFinetuningConfig",
    "QuantizationConfig",
    "QuantizationType",
    "QueryChunksResponse",
    "RAGDocument",
    "RAGQueryConfig",
    "RAGQueryGenerator",
    "RAGQueryGeneratorConfig",
    "RAGQueryResult",
    "RAGSearchMode",
    "register_schema",
    "RLHFAlgorithm",
    "RRFRanker",
    "Ranker",
    "RegexParserScoringFnParams",
    "RemoteProviderConfig",
    "RemoteProviderSpec",
    "RerankData",
    "RerankResponse",
    "Resource",
    "ResourceNotFoundError",
    "ResourceType",
    "ResponseFormat",
    "ResponseFormatType",
    "ResponseGuardrail",
    "ResponseGuardrailSpec",
    "RouteInfo",
    "RoutingTable",
    "RowsDataSource",
    "RunShieldResponse",
    "Safety",
    "SafetyViolation",
    "SamplingParams",
    "SamplingStrategy",
    "ScoreBatchResponse",
    "ScoreResponse",
    "Scoring",
    "ScoringFn",
    "ScoringFnInput",
    "ScoringFnParams",
    "ScoringFnParamsType",
    "ScoringFunctionStore",
    "ScoringFunctions",
    "ScoringFunctionsProtocolPrivate",
    "ScoringResult",
    "ScoringResultRow",
    "Schema",
    "SchemaOptions",
    "SearchRankingOptions",
    "Shield",
    "ShieldInput",
    "ShieldStore",
    "Shields",
    "ShieldsProtocolPrivate",
    "SpecialToolGroup",
    "StrictJsonType",
    "StringType",
    "SystemMessage",
    "SystemMessageBehavior",
    "TextContentItem",
    "TextTruncation",
    "TokenLogProbs",
    "TokenValidationError",
    "ToolChoice",
    "ToolGroupNotFoundError",
    "ToolDef",
    "ToolGroup",
    "ToolGroupInput",
    "ToolGroups",
    "ToolGroupsProtocolPrivate",
    "ToolInvocationResult",
    "ToolResponseMessage",
    "ToolRuntime",
    "ToolStore",
    "TopKSamplingStrategy",
    "TopPSamplingStrategy",
    "TrainingConfig",
    "UnsupportedModelError",
    "unwrap_generic_list",
    "unwrap_optional_type",
    "unwrap_union_types",
    "URIDataSource",
    "URL",
    "_URLOrData",
    "UserMessage",
    "VectorIO",
    "VectorStore",
    "VectorStoreChunkingStrategy",
    "VectorStoreChunkingStrategyAuto",
    "VectorStoreChunkingStrategyStatic",
    "VectorStoreChunkingStrategyStaticConfig",
    "VectorStoreContent",
    "VectorStoreCreateRequest",
    "VectorStoreDeleteResponse",
    "VectorStoreFileBatchObject",
    "VectorStoreFileContentResponse",
    "VectorStoreFileCounts",
    "VectorStoreFileDeleteResponse",
    "VectorStoreFileLastError",
    "VectorStoreFileObject",
    "VectorStoreFileStatus",
    "VectorStoreFilesListInBatchResponse",
    "VectorStoreInput",
    "VectorStoreListFilesResponse",
    "VectorStoreListResponse",
    "VectorStoreModifyRequest",
    "VectorStoreObject",
    "VectorStoreSearchRequest",
    "VectorStoreSearchResponse",
    "VectorStoreSearchResponsePage",
    "VectorStoreTable",
    "VectorStoreNotFoundError",
    "VectorStoresProtocolPrivate",
    "VersionInfo",
    "ViolationLevel",
    "webmethod",
    "WebMethod",
    "WebSearchToolTypes",
    "WeightedRanker",
 ]
--- a/src/llama-stack-api/llama_stack_api/agents.py
+++ b/src/llama-stack-api/llama_stack_api/agents.py
@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.common.responses import Order
+from llama_stack_api.common.responses import Order
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
-from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 from .openai_responses import (
    ListOpenAIResponseInputItem,
--- a/src/llama-stack-api/llama_stack_api/batches.py
+++ b/src/llama-stack-api/llama_stack_api/batches.py
@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 try:
    from openai.types import Batch as BatchObject
--- a/src/llama-stack-api/llama_stack_api/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/benchmarks.py
@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 class CommonBenchmarkFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/common/init.py
+++ b/src/llama-stack-api/llama_stack_api/common/init.py
--- a/src/llama-stack-api/llama_stack_api/common/content_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/content_types.py
@ -4,13 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from enum import Enum
 from typing import Annotated, Literal
 from pydantic import BaseModel, Field, model_validator
-from llama_stack.models.llama.datatypes import ToolCall
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 from llama_stack.schema_utils import json_schema_type, register_schema
@json_schema_type
@ -101,43 +99,3 @@ class ImageDelta(BaseModel):
    type: Literal["image"] = "image"
    image: bytes
 class ToolCallParseStatus(Enum):
    """Status of tool call parsing during streaming.
    :cvar started: Tool call parsing has begun
    :cvar in_progress: Tool call parsing is ongoing
    :cvar failed: Tool call parsing failed
    :cvar succeeded: Tool call parsing completed successfully
    """
    started = "started"
    in_progress = "in_progress"
    failed = "failed"
    succeeded = "succeeded"
@json_schema_type
 class ToolCallDelta(BaseModel):
    """A tool call content delta for streaming responses.
    :param type: Discriminator type of the delta. Always "tool_call"
    :param tool_call: Either an in-progress tool call string or the final parsed tool call
    :param parse_status: Current parsing status of the tool call
    """
    type: Literal["tool_call"] = "tool_call"
    # you either send an in-progress tool call so the client can stream a long
    # code generation or you send the final parsed tool call at the end of the
    # stream
    tool_call: str | ToolCall
    parse_status: ToolCallParseStatus
 # streaming completions send a stream of ContentDeltas
 ContentDelta = Annotated[
    TextDelta | ImageDelta | ToolCallDelta,
    Field(discriminator="type"),
 ]
 register_schema(ContentDelta, name="ContentDelta")
--- a/src/llama-stack-api/llama_stack_api/common/errors.py
+++ b/src/llama-stack-api/llama_stack_api/common/errors.py
--- a/src/llama-stack-api/llama_stack_api/common/job_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/job_types.py
@ -7,7 +7,7 @@ from enum import Enum
 from pydantic import BaseModel
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 class JobStatus(Enum):
--- a/src/llama-stack-api/llama_stack_api/common/responses.py
+++ b/src/llama-stack-api/llama_stack_api/common/responses.py
@ -9,7 +9,7 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 class Order(Enum):
--- a/src/llama-stack-api/llama_stack_api/common/tracing.py
+++ b/src/llama-stack-api/llama_stack_api/common/tracing.py
--- a/src/llama-stack-api/llama_stack_api/common/training_types.py
+++ b/src/llama-stack-api/llama_stack_api/common/training_types.py
@ -8,7 +8,7 @@ from datetime import datetime
 from pydantic import BaseModel
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/common/type_system.py
+++ b/src/llama-stack-api/llama_stack_api/common/type_system.py
@ -8,7 +8,7 @@ from typing import Annotated, Literal
 from pydantic import BaseModel, Field
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/conversations.py
@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.openai_responses import (
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseMCPApprovalRequest,
    OpenAIResponseMCPApprovalResponse,
@ -20,9 +21,8 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageMCPListTools,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 Metadata = dict[str, str]
--- a/src/llama-stack-api/llama_stack_api/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/datasetio.py
@ -6,10 +6,10 @@
 from typing import Any, Protocol, runtime_checkable
-from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack.apis.datasets import Dataset
+from llama_stack_api.datasets import Dataset
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
+from llama_stack_api.schema_utils import webmethod
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 class DatasetStore(Protocol):
--- a/src/llama-stack-api/llama_stack_api/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/datasets.py
@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 class DatasetPurpose(StrEnum):
--- a/src/llama-stack-api/llama_stack_api/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/datatypes.py
@ -4,21 +4,172 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import StrEnum
+from enum import Enum, EnumMeta, StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 from pydantic import BaseModel, Field
-from llama_stack.apis.benchmarks import Benchmark
+from llama_stack_api.benchmarks import Benchmark
-from llama_stack.apis.datasets import Dataset
+from llama_stack_api.datasets import Dataset
-from llama_stack.apis.datatypes import Api
+from llama_stack_api.models import Model
-from llama_stack.apis.models import Model
+from llama_stack_api.schema_utils import json_schema_type
-from llama_stack.apis.scoring_functions import ScoringFn
+from llama_stack_api.scoring_functions import ScoringFn
-from llama_stack.apis.shields import Shield
+from llama_stack_api.shields import Shield
-from llama_stack.apis.tools import ToolGroup
+from llama_stack_api.tools import ToolGroup
-from llama_stack.apis.vector_stores import VectorStore
+from llama_stack_api.vector_stores import VectorStore
-from llama_stack.schema_utils import json_schema_type
+
 class DynamicApiMeta(EnumMeta):
    def __new__(cls, name, bases, namespace):
        # Store the original enum values
        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
        # Create the enum class
        cls = super().__new__(cls, name, bases, namespace)
        # Store the original values for reference
        cls._original_values = original_values
        # Initialize _dynamic_values
        cls._dynamic_values = {}
        return cls
    def __call__(cls, value):
        try:
            return super().__call__(value)
        except ValueError as e:
            # If this value was already dynamically added, return it
            if value in cls._dynamic_values:
                return cls._dynamic_values[value]
            # If the value doesn't exist, create a new enum member
            # Create a new member name from the value
            member_name = value.lower().replace("-", "_")
            # If this member name already exists in the enum, return the existing member
            if member_name in cls._member_map_:
                return cls._member_map_[member_name]
            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
            # register new APIs explicitly
            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
    def __iter__(cls):
        # Allow iteration over both static and dynamic members
        yield from super().__iter__()
        if hasattr(cls, "_dynamic_values"):
            yield from cls._dynamic_values.values()
    def add(cls, value):
        """
        Add a new API to the enum.
        Used to register external APIs.
        """
        member_name = value.lower().replace("-", "_")
        # If this member name already exists in the enum, return it
        if member_name in cls._member_map_:
            return cls._member_map_[member_name]
        # Create a new enum member
        member = object.__new__(cls)
        member._name_ = member_name
        member._value_ = value
        # Add it to the enum class
        cls._member_map_[member_name] = member
        cls._member_names_.append(member_name)
        cls._member_type_ = str
        # Store it in our dynamic values
        cls._dynamic_values[value] = member
        return member
@json_schema_type
 class Api(Enum, metaclass=DynamicApiMeta):
    """Enumeration of all available APIs in the Llama Stack system.
    :cvar providers: Provider management and configuration
    :cvar inference: Text generation, chat completions, and embeddings
    :cvar safety: Content moderation and safety shields
    :cvar agents: Agent orchestration and execution
    :cvar batches: Batch processing for asynchronous API requests
    :cvar vector_io: Vector database operations and queries
    :cvar datasetio: Dataset input/output operations
    :cvar scoring: Model output evaluation and scoring
    :cvar eval: Model evaluation and benchmarking framework
    :cvar post_training: Fine-tuning and model training
    :cvar tool_runtime: Tool execution and management
    :cvar telemetry: Observability and system monitoring
    :cvar models: Model metadata and management
    :cvar shields: Safety shield implementations
    :cvar datasets: Dataset creation and management
    :cvar scoring_functions: Scoring function definitions
    :cvar benchmarks: Benchmark suite management
    :cvar tool_groups: Tool group organization
    :cvar files: File storage and management
    :cvar prompts: Prompt versions and management
    :cvar inspect: Built-in system inspection and introspection
    """
    providers = "providers"
    inference = "inference"
    safety = "safety"
    agents = "agents"
    batches = "batches"
    vector_io = "vector_io"
    datasetio = "datasetio"
    scoring = "scoring"
    eval = "eval"
    post_training = "post_training"
    tool_runtime = "tool_runtime"
    models = "models"
    shields = "shields"
    vector_stores = "vector_stores"  # only used for routing table
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
    tool_groups = "tool_groups"
    files = "files"
    prompts = "prompts"
    conversations = "conversations"
    # built-in API
    inspect = "inspect"
@json_schema_type
 class Error(BaseModel):
    """
    Error response from the API. Roughly follows RFC 7807.
    :param status: HTTP status code
    :param title: Error title, a short summary of the error which is invariant for an error type
    :param detail: Error detail, a longer human-readable description of the error
    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
    """
    status: int
    title: str
    detail: str
    instance: str | None = None
 class ExternalApiSpec(BaseModel):
    """Specification for an external API implementation."""
    module: str = Field(..., description="Python module containing the API implementation")
    name: str = Field(..., description="Name of the API")
    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
    protocol: str = Field(..., description="Name of the protocol class for the API")
 # Provider-related types (merged from providers/datatypes.py)
 # NOTE: These imports are forward references to avoid circular dependencies
 # They will be resolved at runtime when the classes are used
 class ModelsProtocolPrivate(Protocol):
--- a/src/llama-stack-api/llama_stack_api/eval.py
+++ b/src/llama-stack-api/llama_stack_api/eval.py
@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.job_types import Job
+from llama_stack_api.common.job_types import Job
-from llama_stack.apis.inference import SamplingParams, SystemMessage
+from llama_stack_api.inference import SamplingParams, SystemMessage
-from llama_stack.apis.scoring import ScoringResult
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.scoring import ScoringResult
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.scoring_functions import ScoringFnParams
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/files.py
+++ b/src/llama-stack-api/llama_stack_api/files.py
@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.responses import Order
+from llama_stack_api.common.responses import Order
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 # OpenAI Files API Models
--- a/src/llama-stack-api/llama_stack_api/inference.py
+++ b/src/llama-stack-api/llama_stack_api/inference.py
@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
-from llama_stack.apis.common.content_types import InterleavedContent
+from llama_stack_api.common.content_types import InterleavedContent
-from llama_stack.apis.common.responses import (
+from llama_stack_api.common.responses import (
    Order,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.models import Model
+from llama_stack_api.models import Model
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/inspect.py
@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.version import (
+from llama_stack_api.datatypes import HealthStatus
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import (
    LLAMA_STACK_API_V1,
 )
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
--- a/src/llama-stack-api/llama_stack_api/models.py
+++ b/src/llama-stack-api/llama_stack_api/models.py
@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field, field_validator
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 class CommonModelFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/openai_responses.py
@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
-from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
+from llama_stack_api.schema_utils import json_schema_type, register_schema
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
--- a/src/llama-stack-api/llama_stack_api/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/post_training.py
@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.content_types import URL
+from llama_stack_api.common.content_types import URL
-from llama_stack.apis.common.job_types import JobStatus
+from llama_stack_api.common.job_types import JobStatus
-from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack_api.common.training_types import Checkpoint
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/prompts.py
@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable
 from pydantic import BaseModel, Field, field_validator, model_validator
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/providers.py
+++ b/src/llama-stack-api/llama_stack_api/providers.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.datatypes import HealthResponse
-from llama_stack.providers.datatypes import HealthResponse
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/py.typed
+++ b/src/llama-stack-api/llama_stack_api/py.typed
--- a/src/llama-stack-api/llama_stack_api/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/rag_tool.py
@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, field_validator
-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
 class RRFRanker(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/resource.py
+++ b/src/llama-stack-api/llama_stack_api/resource.py
--- a/src/llama-stack-api/llama_stack_api/safety.py
+++ b/src/llama-stack-api/llama_stack_api/safety.py
@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import OpenAIMessageParam
+from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack.apis.shields import Shield
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.shields import Shield
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/schema_utils.py
+++ b/src/llama-stack-api/llama_stack_api/schema_utils.py
--- a/src/llama-stack-api/llama_stack_api/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/scoring.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
--- a/src/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/src/llama_stack/apis/scoring_functions/scoring_functions.py
@ -16,10 +16,10 @@ from typing import (
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.type_system import ParamType
+from llama_stack_api.common.type_system import ParamType
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
--- a/src/llama-stack-api/llama_stack_api/shields.py
+++ b/src/llama-stack-api/llama_stack_api/shields.py
@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 class CommonShieldFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/strong_typing/init.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/init.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/core.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/core.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/name.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/name.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
--- a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
--- a/src/llama-stack-api/llama_stack_api/tools.py
+++ b/src/llama-stack-api/llama_stack_api/tools.py
@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable
-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/vector_io.py
@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import InterleavedContent
+from llama_stack_api.inference import InterleavedContent
-from llama_stack.apis.vector_stores import VectorStore
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.strong_typing.schema import register_schema
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.vector_stores import VectorStore
-from llama_stack.strong_typing.schema import register_schema
+from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/vector_stores.py
@ -8,7 +8,7 @@ from typing import Literal
 from pydantic import BaseModel
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 # Internal resource type for storing the vector store routing and other information
--- a/src/llama-stack-api/llama_stack_api/version.py
+++ b/src/llama-stack-api/llama_stack_api/version.py
--- a/src/llama-stack-api/pyproject.toml
+++ b/src/llama-stack-api/pyproject.toml
@ -0,0 +1,82 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [tool.uv]
 required-version = ">=0.7.0"
 [project]
 name = "llama-stack-api"
 version = "0.1.0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
 readme = "README.md"
 requires-python = ">=3.12"
 license = { "text" = "MIT" }
 classifiers = [
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
    "Intended Audience :: Developers",
    "Intended Audience :: Information Technology",
    "Intended Audience :: Science/Research",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Information Analysis",
 ]
 dependencies = [
    "pydantic>=2.11.9",
    "jsonschema",
    "opentelemetry-sdk>=1.30.0",
    "opentelemetry-exporter-otlp-proto-http>=1.30.0",
 ]
 [project.urls]
 Homepage = "https://github.com/llamastack/llama-stack"
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["llama_stack_api", "llama_stack_api.*"]
 [tool.setuptools.package-data]
 llama_stack_api = ["py.typed"]
 [tool.ruff]
 line-length = 120
 [tool.ruff.lint]
 select = [
    "UP",      # pyupgrade
    "B",       # flake8-bugbear
    "B9",      # flake8-bugbear subset
    "C",       # comprehensions
    "E",       # pycodestyle
    "F",       # Pyflakes
    "N",       # Naming
    "W",       # Warnings
    "DTZ",     # datetime rules
    "I",       # isort (imports order)
    "RUF001",  # Checks for ambiguous Unicode characters in strings
    "RUF002",  # Checks for ambiguous Unicode characters in docstrings
    "RUF003",  # Checks for ambiguous Unicode characters in comments
    "PLC2401", # Checks for the use of non-ASCII characters in variable names
 ]
 ignore = [
    # The following ignores are desired by the project maintainers.
    "E402",   # Module level import not at top of file
    "E501",   # Line too long
    "F405",   # Maybe undefined or defined from star import
    "C408",   # Ignored because we like the dict keyword argument syntax
    "N812",   # Ignored because import torch.nn.functional as F is PyTorch convention
    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
    "C901",   # Complexity of the function is too high
 ]
 unfixable = [
    "PLE2515",
 ] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
 [tool.ruff.lint.per-file-ignores]
 "llama_stack_api/apis/**/__init__.py" = ["F403"]
 [tool.ruff.lint.pep8-naming]
 classmethod-decorators = ["classmethod", "pydantic.field_validator"]
--- a/src/llama_stack/apis/agents/init.py
+++ b/src/llama_stack/apis/agents/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .agents import *
--- a/src/llama_stack/apis/batches/init.py
+++ b/src/llama_stack/apis/batches/init.py
@ -1,9 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .batches import Batches, BatchObject, ListBatchesResponse
 __all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
--- a/src/llama_stack/apis/benchmarks/init.py
+++ b/src/llama_stack/apis/benchmarks/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .benchmarks import *
--- a/src/llama_stack/apis/common/init.py
+++ b/src/llama_stack/apis/common/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama_stack/apis/conversations/init.py
+++ b/src/llama_stack/apis/conversations/init.py
@ -1,27 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
    ConversationItemCreateRequest,
    ConversationItemDeletedResource,
    ConversationItemList,
    Conversations,
    Metadata,
 )
 __all__ = [
    "Conversation",
    "ConversationDeletedResource",
    "ConversationItem",
    "ConversationItemCreateRequest",
    "ConversationItemDeletedResource",
    "ConversationItemList",
    "Conversations",
    "Metadata",
 ]
--- a/src/llama_stack/apis/datasetio/init.py
+++ b/src/llama_stack/apis/datasetio/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .datasetio import *
--- a/src/llama_stack/apis/datasets/init.py
+++ b/src/llama_stack/apis/datasets/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .datasets import *
--- a/src/llama_stack/apis/datatypes.py
+++ b/src/llama_stack/apis/datatypes.py
@ -1,158 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from enum import Enum, EnumMeta
 from pydantic import BaseModel, Field
 from llama_stack.schema_utils import json_schema_type
 class DynamicApiMeta(EnumMeta):
    def __new__(cls, name, bases, namespace):
        # Store the original enum values
        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
        # Create the enum class
        cls = super().__new__(cls, name, bases, namespace)
        # Store the original values for reference
        cls._original_values = original_values
        # Initialize _dynamic_values
        cls._dynamic_values = {}
        return cls
    def __call__(cls, value):
        try:
            return super().__call__(value)
        except ValueError as e:
            # If this value was already dynamically added, return it
            if value in cls._dynamic_values:
                return cls._dynamic_values[value]
            # If the value doesn't exist, create a new enum member
            # Create a new member name from the value
            member_name = value.lower().replace("-", "_")
            # If this member name already exists in the enum, return the existing member
            if member_name in cls._member_map_:
                return cls._member_map_[member_name]
            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
            # register new APIs explicitly
            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
    def __iter__(cls):
        # Allow iteration over both static and dynamic members
        yield from super().__iter__()
        if hasattr(cls, "_dynamic_values"):
            yield from cls._dynamic_values.values()
    def add(cls, value):
        """
        Add a new API to the enum.
        Used to register external APIs.
        """
        member_name = value.lower().replace("-", "_")
        # If this member name already exists in the enum, return it
        if member_name in cls._member_map_:
            return cls._member_map_[member_name]
        # Create a new enum member
        member = object.__new__(cls)
        member._name_ = member_name
        member._value_ = value
        # Add it to the enum class
        cls._member_map_[member_name] = member
        cls._member_names_.append(member_name)
        cls._member_type_ = str
        # Store it in our dynamic values
        cls._dynamic_values[value] = member
        return member
@json_schema_type
 class Api(Enum, metaclass=DynamicApiMeta):
    """Enumeration of all available APIs in the Llama Stack system.
    :cvar providers: Provider management and configuration
    :cvar inference: Text generation, chat completions, and embeddings
    :cvar safety: Content moderation and safety shields
    :cvar agents: Agent orchestration and execution
    :cvar batches: Batch processing for asynchronous API requests
    :cvar vector_io: Vector database operations and queries
    :cvar datasetio: Dataset input/output operations
    :cvar scoring: Model output evaluation and scoring
    :cvar eval: Model evaluation and benchmarking framework
    :cvar post_training: Fine-tuning and model training
    :cvar tool_runtime: Tool execution and management
    :cvar telemetry: Observability and system monitoring
    :cvar models: Model metadata and management
    :cvar shields: Safety shield implementations
    :cvar datasets: Dataset creation and management
    :cvar scoring_functions: Scoring function definitions
    :cvar benchmarks: Benchmark suite management
    :cvar tool_groups: Tool group organization
    :cvar files: File storage and management
    :cvar prompts: Prompt versions and management
    :cvar inspect: Built-in system inspection and introspection
    """
    providers = "providers"
    inference = "inference"
    safety = "safety"
    agents = "agents"
    batches = "batches"
    vector_io = "vector_io"
    datasetio = "datasetio"
    scoring = "scoring"
    eval = "eval"
    post_training = "post_training"
    tool_runtime = "tool_runtime"
    models = "models"
    shields = "shields"
    vector_stores = "vector_stores"  # only used for routing table
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
    tool_groups = "tool_groups"
    files = "files"
    prompts = "prompts"
    conversations = "conversations"
    # built-in API
    inspect = "inspect"
@json_schema_type
 class Error(BaseModel):
    """
    Error response from the API. Roughly follows RFC 7807.
    :param status: HTTP status code
    :param title: Error title, a short summary of the error which is invariant for an error type
    :param detail: Error detail, a longer human-readable description of the error
    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
    """
    status: int
    title: str
    detail: str
    instance: str | None = None
 class ExternalApiSpec(BaseModel):
    """Specification for an external API implementation."""
    module: str = Field(..., description="Python module containing the API implementation")
    name: str = Field(..., description="Name of the API")
    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
    protocol: str = Field(..., description="Name of the protocol class for the API")
--- a/src/llama_stack/apis/eval/init.py
+++ b/src/llama_stack/apis/eval/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .eval import *
--- a/src/llama_stack/apis/files/init.py
+++ b/src/llama_stack/apis/files/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .files import *
--- a/src/llama_stack/apis/inference/init.py
+++ b/src/llama_stack/apis/inference/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .inference import *
--- a/src/llama_stack/apis/inspect/init.py
+++ b/src/llama_stack/apis/inspect/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .inspect import *
--- a/src/llama_stack/apis/models/init.py
+++ b/src/llama_stack/apis/models/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .models import *
--- a/src/llama_stack/apis/post_training/init.py
+++ b/src/llama_stack/apis/post_training/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .post_training import *
--- a/src/llama_stack/apis/prompts/init.py
+++ b/src/llama_stack/apis/prompts/init.py
@ -1,9 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .prompts import ListPromptsResponse, Prompt, Prompts
 __all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
--- a/src/llama_stack/apis/providers/init.py
+++ b/src/llama_stack/apis/providers/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .providers import *
--- a/src/llama_stack/apis/safety/init.py
+++ b/src/llama_stack/apis/safety/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .safety import *
--- a/src/llama_stack/apis/scoring/init.py
+++ b/src/llama_stack/apis/scoring/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .scoring import *
--- a/src/llama_stack/apis/scoring_functions/init.py
+++ b/src/llama_stack/apis/scoring_functions/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .scoring_functions import *
--- a/src/llama_stack/apis/shields/init.py
+++ b/src/llama_stack/apis/shields/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .shields import *
--- a/src/llama_stack/apis/tools/init.py
+++ b/src/llama_stack/apis/tools/init.py
@ -1,8 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .rag_tool import *
 from .tools import *
--- a/src/llama_stack/apis/vector_io/init.py
+++ b/src/llama_stack/apis/vector_io/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .vector_io import *
--- a/src/llama_stack/apis/vector_stores/init.py
+++ b/src/llama_stack/apis/vector_stores/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .vector_stores import *
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@ -9,6 +9,7 @@ import sys
 from pathlib import Path
 import yaml
 from llama_stack_api import Api
 from termcolor import cprint
 from llama_stack.cli.stack.utils import ImageType
@ -21,7 +22,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@ -11,6 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 import yaml
 from llama_stack_api import Api
 from termcolor import cprint
 from llama_stack.core.datatypes import (
@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -6,6 +6,7 @@
 import sys
 from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
@ -13,7 +14,6 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 log = get_logger(name=__name__, category="core")
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@ -12,11 +12,10 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 import httpx
 from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 from llama_stack.providers.datatypes import RemoteProviderConfig
 _CLIENT_CLASSES = {}
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -6,6 +6,8 @@
 import textwrap
 from typing import Any
 from llama_stack_api import Api, ProviderSpec
 from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
    DistributionSpec,
@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, ProviderSpec
 logger = get_logger(name=__name__, category="core")
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -8,9 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
-from pydantic import BaseModel, TypeAdapter
+from llama_stack_api import (
 from llama_stack.apis.conversations.conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
    Conversations,
    Metadata,
 )
 from pydantic import BaseModel, TypeAdapter
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -9,22 +9,34 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 from llama_stack_api import (
    Api,
    Benchmark,
    BenchmarkInput,
    Dataset,
    DatasetInput,
    DatasetIO,
    Eval,
    Inference,
    Model,
    ModelInput,
    ProviderSpec,
    Resource,
    Safety,
    Scoring,
    ScoringFn,
    ScoringFnInput,
    Shield,
    ShieldInput,
    ToolGroup,
    ToolGroupInput,
    ToolRuntime,
    VectorIO,
    VectorStore,
    VectorStoreInput,
 )
 from pydantic import BaseModel, Field, field_validator, model_validator
 from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Dataset, DatasetInput
 from llama_stack.apis.eval import Eval
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.models import Model, ModelInput
 from llama_stack.apis.resource import Resource
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
 from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
@ -32,7 +44,6 @@ from llama_stack.core.storage.datatypes import (
    StorageConfig,
 )
 from llama_stack.log import LoggingConfig
 from llama_stack.providers.datatypes import Api, ProviderSpec
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@ -10,17 +10,17 @@ import os
 from typing import Any
 import yaml
-from pydantic import BaseModel
+from llama_stack_api import (
 from llama_stack.core.datatypes import BuildConfig, DistributionSpec
 from llama_stack.core.external import load_external_apis
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    Api,
    InlineProviderSpec,
    ProviderSpec,
    RemoteProviderSpec,
 )
 from pydantic import BaseModel
 from llama_stack.core.datatypes import BuildConfig, DistributionSpec
 from llama_stack.core.external import load_external_apis
 from llama_stack.log import get_logger
 logger = get_logger(name=__name__, category="core")
--- a/Show more
+++ b/Show more