feat: split API and provider specs into separate llama-stack-api pkg

Extract API definitions, models, and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Preserved git history using git mv for moved files - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages - Rebased on top of upstream src/ layout changes Testing Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-10-30 12:25:23 -04:00 · 2025-10-30 12:25:23 -04:00 · 85d407c2a0
commit 85d407c2a0
parent e5a55f3677
359 changed files with 1259 additions and 980 deletions
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@ -30,13 +30,16 @@ jobs:
        activate-environment: true
        version: 0.7.6
-    - name: Build Llama Stack package
+    - name: Build Llama Stack Spec package
-      run: |
+      working-directory: src/llama-stack-api
-        uv build
+      run: uv build
-    - name: Install Llama Stack package
+    - name: Build Llama Stack package
      run: uv build
    - name: Install Llama Stack package (with spec from local build)
      run: |
-        uv pip install dist/*.whl
+        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
    - name: Verify Llama Stack package
      run: |
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -42,7 +42,7 @@ repos:
    hooks:
    -   id: ruff
        args: [ --fix ]
-        exclude: ^src/llama_stack/strong_typing/.*$
+        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
    -   id: ruff-format
 -   repo: https://github.com/adamchainz/blacken-docs
--- a/docs/docs/concepts/apis/external.mdx
+++ b/docs/docs/concepts/apis/external.mdx
@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha
 ```python
 # llama_stack_api_weather/api.py
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
 def available_providers() -> list[ProviderSpec]:
@ -79,7 +79,7 @@ A Protocol class like so:
 # llama_stack_api_weather/api.py
 from typing import Protocol
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.schema_utils import webmethod
 class WeatherAPI(Protocol):
@ -151,12 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"]
 # llama-stack-api-weather/src/llama_stack_api_weather/weather.py
 from typing import Protocol
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
    Api,
    ProviderSpec,
    RemoteProviderSpec,
 )
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.schema_utils import webmethod
 def available_providers() -> list[ProviderSpec]:
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d
 Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
 ```python
-from llama_stack.providers.datatypes import ProviderSpec
+from llama_stack_api.providers.datatypes import ProviderSpec
 def get_provider_spec() -> ProviderSpec:
--- a/docs/docs/providers/external/external-providers-guide.mdx
+++ b/docs/docs/providers/external/external-providers-guide.mdx
@ -80,7 +80,7 @@ container_image: custom-vector-store:latest  # optional
 All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
 ```python
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
    ProviderSpec,
    Api,
    RemoteProviderSpec,
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -153,7 +153,7 @@ description: |
  Example using RAGQueryConfig with different search modes:
  ```python
-  from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
  # Vector search
  config = RAGQueryConfig(mode="vector", max_chunks=5)
@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 ```python
-from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
-from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 from .pyopenapi.options import Options  # noqa: E402
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -16,10 +16,10 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 from fastapi import UploadFile
-from llama_stack.apis.datatypes import Error
+from llama_stack_api.apis.datatypes import Error
-from llama_stack.strong_typing.core import JsonType
+from llama_stack_api.strong_typing.core import JsonType
-from llama_stack.strong_typing.docstring import Docstring, parse_type
+from llama_stack_api.strong_typing.docstring import Docstring, parse_type
-from llama_stack.strong_typing.inspection import (
+from llama_stack_api.strong_typing.inspection import (
    is_generic_list,
    is_type_optional,
    is_type_union,
@ -28,15 +28,15 @@ from llama_stack.strong_typing.inspection import (
    unwrap_optional_type,
    unwrap_union_types,
 )
-from llama_stack.strong_typing.name import python_type_to_name
+from llama_stack_api.strong_typing.name import python_type_to_name
-from llama_stack.strong_typing.schema import (
+from llama_stack_api.strong_typing.schema import (
    get_schema_identifier,
    JsonSchemaGenerator,
    register_schema,
    Schema,
    SchemaOptions,
 )
-from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
+from llama_stack_api.strong_typing.serialization import json_dump_string, object_to_json
 from pydantic import BaseModel
 from .operations import (
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -11,11 +11,11 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 from termcolor import colored
-from llama_stack.strong_typing.inspection import get_signature
+from llama_stack_api.strong_typing.inspection import get_signature
 from typing import get_origin, get_args
@ -23,7 +23,7 @@ from fastapi import UploadFile
 from fastapi.params import File, Form
 from typing import Annotated
-from llama_stack.schema_utils import ExtraBodyField
+from llama_stack_api.schema_utils import ExtraBodyField
 def split_prefix(
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@ -9,7 +9,7 @@ import enum
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union
-from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType
+from llama_stack_api.strong_typing.schema import JsonType, Schema, StrictJsonType
 URL = str
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@ -11,8 +11,8 @@ from pathlib import Path
 from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args
 from pydantic import BaseModel
-from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
+from llama_stack_api.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param
+from llama_stack_api.strong_typing.inspection import is_unwrapped_body_param
 from llama_stack.core.resolver import api_protocol_map
 from .generator import Generator
--- a/pyproject.toml
+++ b/pyproject.toml
@ -31,6 +31,7 @@ dependencies = [
    "httpx",
    "jinja2>=3.1.6",
    "jsonschema",
    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
    "openai>=2.5.0",
    "prompt-toolkit",
    "python-dotenv",
@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*"]
+include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
 [[tool.uv.index]]
 name = "pytorch-cpu"
@ -190,6 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
 llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
 [tool.ruff]
 line-length = 120
@ -256,8 +258,8 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 [tool.mypy]
-mypy_path = ["src"]
+mypy_path = ["src", "src/llama-stack-api"]
-packages = ["llama_stack"]
+packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
 warn_return_any = true
@ -279,15 +281,18 @@ exclude = [
    "^src/llama_stack/core/store/registry\\.py$",
    "^src/llama_stack/core/utils/exec\\.py$",
    "^src/llama_stack/core/utils/prompt_for_config\\.py$",
    # Moved to llama-stack-api but still excluded
    "^src/llama_stack/models/llama/llama3/interface\\.py$",
    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
    "^src/llama_stack/providers/inline/datasetio/localfs/",
    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/models/llama/llama3/generation\\.py$",
    "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
    "^src/llama_stack/models/llama/llama4/",
    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
    "^src/llama_stack/providers/inline/agents/meta_reference/",
    "^src/llama_stack/providers/inline/datasetio/localfs/",
    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
    "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
    "^src/llama_stack/providers/inline/safety/code_scanner/",
@ -337,7 +342,9 @@ exclude = [
    "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
    "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
    "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
    "^src/llama_stack/distributions/template\\.py$",
 ]
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@ -14,8 +14,8 @@ import os
 from pathlib import Path
 import fire
 from llama_stack_api.apis.common.errors import ModelNotFoundError
 from llama_stack.apis.common.errors import ModelNotFoundError
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
    """Extract docstring from the API protocol class."""
    try:
        # Import the API module dynamically
-        api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.apis.{api_name}", fromlist=[api_name.title()])
        # Get the main protocol class (usually capitalized API name)
        protocol_class_name = api_name.title()
@ -83,8 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                # this string replace is ridiculous
                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.apis.inference.inference.", "")
                field_type = field_type.replace("llama_stack.providers.", "")
                field_type = field_type.replace("llama_stack_api.providers.", "")
                default_value = field.default
                if field.default_factory is not None:
--- a/src/llama-stack-api/README.md
+++ b/src/llama-stack-api/README.md
@ -0,0 +1,103 @@
 # llama-stack-api
 API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs.
 ## Overview
 `llama-stack-api` is a minimal dependency package that contains:
 - **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.)
 - **Provider Specifications**: Provider spec definitions for building custom providers
 - **Data Types**: Shared data types and models used across the Llama Stack ecosystem
 - **Type Utilities**: Strong typing utilities and schema validation
 ## What This Package Does NOT Include
 - Server implementation (see `llama-stack` package)
 - Provider implementations (see `llama-stack` package)
 - CLI tools (see `llama-stack` package)
 - Runtime orchestration (see `llama-stack` package)
 ## Use Cases
 This package is designed for:
 1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server
 2. **Client Library Authors**: Use type definitions without server dependencies
 3. **Documentation Generation**: Generate API docs from protocol definitions
 4. **Type Checking**: Validate implementations against the official specs
 ## Installation
 ```bash
 pip install llama-stack-api
 ```
 Or with uv:
 ```bash
 uv pip install llama-stack-api
 ```
 ## Dependencies
 Minimal dependencies:
 - `pydantic>=2.11.9` - For data validation and serialization
 - `jsonschema` - For JSON schema utilities
 ## Versioning
 This package follows semantic versioning independently from the main `llama-stack` package:
 - **Patch versions** (0.1.x): Documentation, internal improvements
 - **Minor versions** (0.x.0): New APIs, backward-compatible changes
 - **Major versions** (x.0.0): Breaking changes to existing APIs
 Current version: **0.1.0**
 ## Usage Example
 ```python
 from llama_stack_api.apis.inference import Inference, ChatCompletionRequest
 from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
 from llama_stack_api.apis.datatypes import Api
 # Use protocol definitions for type checking
 class MyInferenceProvider(Inference):
    async def chat_completion(self, request: ChatCompletionRequest):
        # Your implementation
        pass
 # Define provider specifications
 my_provider_spec = InlineProviderSpec(
    api=Api.inference,
    provider_type="inline::my-provider",
    pip_packages=["my-dependencies"],
    module="my_package.providers.inference",
    config_class="my_package.providers.inference.MyConfig",
 )
 ```
 ## Relationship to llama-stack
 The main `llama-stack` package depends on `llama-stack-api` and provides:
 - Full server implementation
 - Built-in provider implementations
 - CLI tools for running and managing stacks
 - Runtime provider resolution and orchestration
 ## Contributing
 See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines.
 ## License
 MIT License - see LICENSE file for details.
 ## Links
 - [Main Llama Stack Repository](https://github.com/llamastack/llama-stack)
 - [Documentation](https://llamastack.ai/)
 - [Client Library](https://pypi.org/project/llama-stack-client/)
--- a/src/llama-stack-api/llama_stack_api/init.py
+++ b/src/llama-stack-api/llama_stack_api/init.py
@ -0,0 +1,26 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """
 Llama Stack API Specifications
 This package contains the API definitions, data types, and protocol specifications
 for Llama Stack. It is designed to be a lightweight dependency for external providers
 and clients that need to interact with Llama Stack APIs without requiring the full
 server implementation.
 Key components:
 - apis: Protocol definitions for all Llama Stack APIs
 - providers: Provider interface specifications
 - strong_typing: Type system utilities
 - schema_utils: Schema validation and utilities
 """
 __version__ = "0.1.0"
 from . import apis, providers, schema_utils, strong_typing  # noqa: F401
 __all__ = ["apis", "providers", "schema_utils", "strong_typing"]
--- a/src/llama-stack-api/llama_stack_api/apis/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/agents/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/agents/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/agents/agents.py
+++ b/src/llama-stack-api/llama_stack_api/apis/agents/agents.py
@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.common.responses import Order
+from llama_stack_api.apis.common.responses import Order
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
 from .openai_responses import (
    ListOpenAIResponseInputItem,
--- a/src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py
@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
-from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
+from llama_stack_api.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
--- a/src/llama-stack-api/llama_stack_api/apis/batches/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/batches/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/batches/batches.py
+++ b/src/llama-stack-api/llama_stack_api/apis/batches/batches.py
@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 try:
    from openai.types import Batch as BatchObject
--- a/src/llama-stack-api/llama_stack_api/apis/benchmarks/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/benchmarks/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py
@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 class CommonBenchmarkFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/common/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/content_types.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/content_types.py
@ -4,13 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from enum import Enum
 from typing import Annotated, Literal
 from pydantic import BaseModel, Field, model_validator
-from llama_stack.models.llama.datatypes import ToolCall
+from llama_stack_api.schema_utils import json_schema_type, register_schema
 from llama_stack.schema_utils import json_schema_type, register_schema
@json_schema_type
@ -101,43 +99,3 @@ class ImageDelta(BaseModel):
    type: Literal["image"] = "image"
    image: bytes
 class ToolCallParseStatus(Enum):
    """Status of tool call parsing during streaming.
    :cvar started: Tool call parsing has begun
    :cvar in_progress: Tool call parsing is ongoing
    :cvar failed: Tool call parsing failed
    :cvar succeeded: Tool call parsing completed successfully
    """
    started = "started"
    in_progress = "in_progress"
    failed = "failed"
    succeeded = "succeeded"
@json_schema_type
 class ToolCallDelta(BaseModel):
    """A tool call content delta for streaming responses.
    :param type: Discriminator type of the delta. Always "tool_call"
    :param tool_call: Either an in-progress tool call string or the final parsed tool call
    :param parse_status: Current parsing status of the tool call
    """
    type: Literal["tool_call"] = "tool_call"
    # you either send an in-progress tool call so the client can stream a long
    # code generation or you send the final parsed tool call at the end of the
    # stream
    tool_call: str | ToolCall
    parse_status: ToolCallParseStatus
 # streaming completions send a stream of ContentDeltas
 ContentDelta = Annotated[
    TextDelta | ImageDelta | ToolCallDelta,
    Field(discriminator="type"),
 ]
 register_schema(ContentDelta, name="ContentDelta")
--- a/src/llama-stack-api/llama_stack_api/apis/common/errors.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/errors.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/job_types.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/job_types.py
@ -7,7 +7,7 @@ from enum import Enum
 from pydantic import BaseModel
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 class JobStatus(Enum):
--- a/src/llama-stack-api/llama_stack_api/apis/common/responses.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/responses.py
@ -9,7 +9,7 @@ from typing import Any
 from pydantic import BaseModel
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 class Order(Enum):
--- a/src/llama-stack-api/llama_stack_api/apis/common/tracing.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/tracing.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/training_types.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/training_types.py
@ -8,7 +8,7 @@ from datetime import datetime
 from pydantic import BaseModel
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/common/type_system.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/type_system.py
@ -8,7 +8,7 @@ from typing import Annotated, Literal
 from pydantic import BaseModel, Field
-from llama_stack.schema_utils import json_schema_type, register_schema
+from llama_stack_api.schema_utils import json_schema_type, register_schema
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/conversations/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/conversations/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py
@ -9,7 +9,7 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.agents.openai_responses import (
+from llama_stack_api.apis.agents.openai_responses import (
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseMCPApprovalRequest,
    OpenAIResponseMCPApprovalResponse,
@ -20,9 +20,9 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageMCPListTools,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 Metadata = dict[str, str]
--- a/src/llama-stack-api/llama_stack_api/apis/datasetio/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasetio/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py
@ -6,10 +6,10 @@
 from typing import Any, Protocol, runtime_checkable
-from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasets import Dataset
+from llama_stack_api.apis.datasets import Dataset
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import webmethod
+from llama_stack_api.schema_utils import webmethod
 class DatasetStore(Protocol):
--- a/src/llama-stack-api/llama_stack_api/apis/datasets/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasets/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py
@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1BETA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 class DatasetPurpose(StrEnum):
--- a/src/llama-stack-api/llama_stack_api/apis/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datatypes.py
@ -8,7 +8,7 @@ from enum import Enum, EnumMeta
 from pydantic import BaseModel, Field
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 class DynamicApiMeta(EnumMeta):
--- a/src/llama-stack-api/llama_stack_api/apis/eval/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/eval/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/eval/eval.py
+++ b/src/llama-stack-api/llama_stack_api/apis/eval/eval.py
@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.job_types import Job
+from llama_stack_api.apis.common.job_types import Job
-from llama_stack.apis.inference import SamplingParams, SystemMessage
+from llama_stack_api.apis.inference import SamplingParams, SystemMessage
-from llama_stack.apis.scoring import ScoringResult
+from llama_stack_api.apis.scoring import ScoringResult
-from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.apis.scoring_functions import ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/files/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/files/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/files/files.py
+++ b/src/llama-stack-api/llama_stack_api/apis/files/files.py
@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.responses import Order
+from llama_stack_api.apis.common.responses import Order
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 # OpenAI Files API Models
--- a/src/llama-stack-api/llama_stack_api/apis/inference/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inference/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/inference/inference.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inference/inference.py
@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
-from llama_stack.apis.common.content_types import InterleavedContent
+from llama_stack_api.apis.common.content_types import InterleavedContent
-from llama_stack.apis.common.responses import (
+from llama_stack_api.apis.common.responses import (
    Order,
 )
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.models import Model
+from llama_stack_api.apis.models import Model
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/inspect/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inspect/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py
@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.version import (
+from llama_stack_api.apis.version import (
    LLAMA_STACK_API_V1,
 )
-from llama_stack.providers.datatypes import HealthStatus
+from llama_stack_api.providers.datatypes import HealthStatus
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
--- a/src/llama-stack-api/llama_stack_api/apis/models/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/models/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/models/models.py
+++ b/src/llama-stack-api/llama_stack_api/apis/models/models.py
@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field, field_validator
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 class CommonModelFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/post_training/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/post_training/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py
@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.content_types import URL
+from llama_stack_api.apis.common.content_types import URL
-from llama_stack.apis.common.job_types import JobStatus
+from llama_stack_api.apis.common.job_types import JobStatus
-from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack_api.apis.common.training_types import Checkpoint
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/prompts/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/prompts/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py
@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable
 from pydantic import BaseModel, Field, field_validator, model_validator
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/providers/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/providers/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/providers/providers.py
+++ b/src/llama-stack-api/llama_stack_api/apis/providers/providers.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.datatypes import HealthResponse
+from llama_stack_api.providers.datatypes import HealthResponse
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/resource.py
+++ b/src/llama-stack-api/llama_stack_api/apis/resource.py
--- a/src/llama-stack-api/llama_stack_api/apis/safety/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/safety/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/safety/safety.py
+++ b/src/llama-stack-api/llama_stack_api/apis/safety/safety.py
@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import OpenAIMessageParam
+from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack.apis.shields import Shield
+from llama_stack_api.apis.shields import Shield
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/scoring/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
--- a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring_functions/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py
@ -16,10 +16,10 @@ from typing import (
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.type_system import ParamType
+from llama_stack_api.apis.common.type_system import ParamType
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
--- a/src/llama-stack-api/llama_stack_api/apis/shields/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/shields/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/shields/shields.py
+++ b/src/llama-stack-api/llama_stack_api/apis/shields/shields.py
@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
 class CommonShieldFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/tools/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/tools/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py
@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, field_validator
-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.apis.common.content_types import URL, InterleavedContent
 class RRFRanker(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/tools/tools.py
+++ b/src/llama-stack-api/llama_stack_api/apis/tools/tools.py
@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable
-from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.apis.common.content_types import URL, InterleavedContent
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/vector_io/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_io/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py
@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body
 from pydantic import BaseModel, Field
-from llama_stack.apis.common.tracing import telemetry_traceable
+from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack.apis.inference import InterleavedContent
+from llama_stack_api.apis.inference import InterleavedContent
-from llama_stack.apis.vector_stores import VectorStore
+from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack.schema_utils import json_schema_type, webmethod
+from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack.strong_typing.schema import register_schema
+from llama_stack_api.strong_typing.schema import register_schema
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/vector_stores/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_stores/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py
@ -8,7 +8,7 @@ from typing import Literal
 from pydantic import BaseModel
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack_api.apis.resource import Resource, ResourceType
 # Internal resource type for storing the vector store routing and other information
--- a/src/llama-stack-api/llama_stack_api/apis/version.py
+++ b/src/llama-stack-api/llama_stack_api/apis/version.py
--- a/src/llama-stack-api/llama_stack_api/providers/init.py
+++ b/src/llama-stack-api/llama_stack_api/providers/init.py
@ -0,0 +1,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .datatypes import *  # noqa: F403
--- a/src/llama-stack-api/llama_stack_api/providers/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/providers/datatypes.py
@ -10,15 +10,15 @@ from urllib.parse import urlparse
 from pydantic import BaseModel, Field
-from llama_stack.apis.benchmarks import Benchmark
+from llama_stack_api.apis.benchmarks import Benchmark
-from llama_stack.apis.datasets import Dataset
+from llama_stack_api.apis.datasets import Dataset
-from llama_stack.apis.datatypes import Api
+from llama_stack_api.apis.datatypes import Api
-from llama_stack.apis.models import Model
+from llama_stack_api.apis.models import Model
-from llama_stack.apis.scoring_functions import ScoringFn
+from llama_stack_api.apis.scoring_functions import ScoringFn
-from llama_stack.apis.shields import Shield
+from llama_stack_api.apis.shields import Shield
-from llama_stack.apis.tools import ToolGroup
+from llama_stack_api.apis.tools import ToolGroup
-from llama_stack.apis.vector_stores import VectorStore
+from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack.schema_utils import json_schema_type
+from llama_stack_api.schema_utils import json_schema_type
 class ModelsProtocolPrivate(Protocol):
--- a/src/llama-stack-api/llama_stack_api/py.typed
+++ b/src/llama-stack-api/llama_stack_api/py.typed
--- a/src/llama-stack-api/llama_stack_api/schema_utils.py
+++ b/src/llama-stack-api/llama_stack_api/schema_utils.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/init.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/init.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/core.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/core.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/exception.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/name.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/name.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/py.typed
--- a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/schema.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/slots.py
--- a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
+++ b/src/llama-stack-api/llama_stack_api/strong_typing/topological.py
--- a/src/llama-stack-api/pyproject.toml
+++ b/src/llama-stack-api/pyproject.toml
@ -0,0 +1,82 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [tool.uv]
 required-version = ">=0.7.0"
 [project]
 name = "llama-stack-api"
 version = "0.1.0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs"
 readme = "README.md"
 requires-python = ">=3.12"
 license = { "text" = "MIT" }
 classifiers = [
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
    "Intended Audience :: Developers",
    "Intended Audience :: Information Technology",
    "Intended Audience :: Science/Research",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Information Analysis",
 ]
 dependencies = [
    "pydantic>=2.11.9",
    "jsonschema",
    "opentelemetry-sdk>=1.30.0",
    "opentelemetry-exporter-otlp-proto-http>=1.30.0",
 ]
 [project.urls]
 Homepage = "https://github.com/llamastack/llama-stack"
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["llama_stack_api", "llama_stack_api.*"]
 [tool.setuptools.package-data]
 llama_stack_api = ["py.typed"]
 [tool.ruff]
 line-length = 120
 [tool.ruff.lint]
 select = [
    "UP",      # pyupgrade
    "B",       # flake8-bugbear
    "B9",      # flake8-bugbear subset
    "C",       # comprehensions
    "E",       # pycodestyle
    "F",       # Pyflakes
    "N",       # Naming
    "W",       # Warnings
    "DTZ",     # datetime rules
    "I",       # isort (imports order)
    "RUF001",  # Checks for ambiguous Unicode characters in strings
    "RUF002",  # Checks for ambiguous Unicode characters in docstrings
    "RUF003",  # Checks for ambiguous Unicode characters in comments
    "PLC2401", # Checks for the use of non-ASCII characters in variable names
 ]
 ignore = [
    # The following ignores are desired by the project maintainers.
    "E402",   # Module level import not at top of file
    "E501",   # Line too long
    "F405",   # Maybe undefined or defined from star import
    "C408",   # Ignored because we like the dict keyword argument syntax
    "N812",   # Ignored because import torch.nn.functional as F is PyTorch convention
    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
    "C901",   # Complexity of the function is too high
 ]
 unfixable = [
    "PLE2515",
 ] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
 [tool.ruff.lint.per-file-ignores]
 "llama_stack_api/apis/**/__init__.py" = ["F403"]
 [tool.ruff.lint.pep8-naming]
 classmethod-decorators = ["classmethod", "pydantic.field_validator"]
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@ -9,6 +9,7 @@ import sys
 from pathlib import Path
 import yaml
 from llama_stack_api.providers.datatypes import Api
 from termcolor import cprint
 from llama_stack.cli.stack.utils import ImageType
@ -21,7 +22,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@ -11,6 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 import yaml
 from llama_stack_api.providers.datatypes import Api
 from termcolor import cprint
 from llama_stack.core.datatypes import (
@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -7,6 +7,7 @@
 import importlib.resources
 import sys
 from llama_stack_api.providers.datatypes import Api
 from pydantic import BaseModel
 from termcolor import cprint
@ -17,7 +18,6 @@ from llama_stack.core.utils.exec import run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 log = get_logger(name=__name__, category="core")
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@ -12,11 +12,10 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 import httpx
 from llama_stack_api.providers.datatypes import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 from llama_stack.providers.datatypes import RemoteProviderConfig
 _CLIENT_CLASSES = {}
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -6,6 +6,8 @@
 import textwrap
 from typing import Any
 from llama_stack_api.providers.datatypes import Api, ProviderSpec
 from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
    DistributionSpec,
@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, ProviderSpec
 logger = get_logger(name=__name__, category="core")
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -8,9 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
-from pydantic import BaseModel, TypeAdapter
+from llama_stack_api.apis.conversations.conversations import (
 from llama_stack.apis.conversations.conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
    Conversations,
    Metadata,
 )
 from pydantic import BaseModel, TypeAdapter
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -9,22 +9,23 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 from llama_stack_api.apis.benchmarks import Benchmark, BenchmarkInput
 from llama_stack_api.apis.datasetio import DatasetIO
 from llama_stack_api.apis.datasets import Dataset, DatasetInput
 from llama_stack_api.apis.eval import Eval
 from llama_stack_api.apis.inference import Inference
 from llama_stack_api.apis.models import Model, ModelInput
 from llama_stack_api.apis.resource import Resource
 from llama_stack_api.apis.safety import Safety
 from llama_stack_api.apis.scoring import Scoring
 from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnInput
 from llama_stack_api.apis.shields import Shield, ShieldInput
 from llama_stack_api.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack_api.apis.vector_io import VectorIO
 from llama_stack_api.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack_api.providers.datatypes import Api, ProviderSpec
 from pydantic import BaseModel, Field, field_validator, model_validator
 from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Dataset, DatasetInput
 from llama_stack.apis.eval import Eval
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.models import Model, ModelInput
 from llama_stack.apis.resource import Resource
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
 from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
    StorageConfig,
 )
 from llama_stack.log import LoggingConfig
 from llama_stack.providers.datatypes import Api, ProviderSpec
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
--- a/Show more
+++ b/Show more