feat: refactor llama-stack-api structure

move llama_stack_api.apis... to top level llama_stack_api. merge provider datatypes and the existing apis.datatypes into a common llama_stack_api.datatypes update all usages of these packages throughout LLS Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-11-12 15:59:34 -05:00 · 2025-11-12 15:59:34 -05:00 · b7480e9c88
commit b7480e9c88
parent d6b915ce0a
296 changed files with 906 additions and 1109 deletions
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -153,7 +153,7 @@ description: |
  Example using RAGQueryConfig with different search modes:
  ```python
-  from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
  # Vector search
  config = RAGQueryConfig(mode="vector", max_chunks=5)
@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 ```python
-from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api.version import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 from .pyopenapi.options import Options  # noqa: E402
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -16,7 +16,7 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 from fastapi import UploadFile
-from llama_stack_api.apis.datatypes import Error
+from llama_stack_api.datatypes import Error
 from llama_stack_api.strong_typing.core import JsonType
 from llama_stack_api.strong_typing.docstring import Docstring, parse_type
 from llama_stack_api.strong_typing.inspection import (
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -11,7 +11,7 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 from termcolor import colored
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@ -14,7 +14,7 @@ import os
 from pathlib import Path
 import fire
-from llama_stack_api.apis.common.errors import ModelNotFoundError
+from llama_stack_api.common.errors import ModelNotFoundError
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
    """Extract docstring from the API protocol class."""
    try:
        # Import the API module dynamically
-        api_module = __import__(f"llama_stack_api.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()])
        # Get the main protocol class (usually capitalized API name)
        protocol_class_name = api_name.title()
@ -83,9 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                # this string replace is ridiculous
                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack_api.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.inference.", "")
                field_type = field_type.replace("llama_stack.providers.", "")
-                field_type = field_type.replace("llama_stack_api.providers.", "")
+                field_type = field_type.replace("llama_stack_api.datatypes.", "")
                default_value = field.default
                if field.default_factory is not None:
--- a/src/llama-stack-api/README.md
+++ b/src/llama-stack-api/README.md
@ -58,9 +58,9 @@ Current version: **0.1.0**
 ## Usage Example
 ```python
-from llama_stack_api.apis.inference import Inference, ChatCompletionRequest
+from llama_stack_api.inference import Inference, ChatCompletionRequest
 from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
-from llama_stack_api.apis.datatypes import Api
+from llama_stack_api.datatypes import Api
 # Use protocol definitions for type checking
--- a/src/llama-stack-api/llama_stack_api/init.py
+++ b/src/llama-stack-api/llama_stack_api/init.py
@ -13,14 +13,15 @@ and clients that need to interact with Llama Stack APIs without requiring the fu
 server implementation.
 Key components:
- apis: Protocol definitions for all Llama Stack APIs
+- API modules (agents, inference, safety, etc.): Protocol definitions for all Llama Stack APIs
- providers: Provider interface specifications
+- datatypes: Core data types and provider specifications
 - common: Common data types used across APIs
 - strong_typing: Type system utilities
 - schema_utils: Schema validation and utilities
 """
 __version__ = "0.1.0"
-from . import apis, providers, schema_utils, strong_typing  # noqa: F401
+from . import common, datatypes, schema_utils, strong_typing  # noqa: F401
-__all__ = ["apis", "providers", "schema_utils", "strong_typing"]
+__all__ = ["common", "datatypes", "schema_utils", "strong_typing"]
--- a/src/llama-stack-api/llama_stack_api/apis/agents/agents.py
+++ b/src/llama-stack-api/llama_stack_api/apis/agents/agents.py
@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack_api.apis.common.responses import Order
+from llama_stack_api.common.responses import Order
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 from .openai_responses import (
    ListOpenAIResponseInputItem,
--- a/src/llama-stack-api/llama_stack_api/apis/agents/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/agents/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .agents import *
--- a/src/llama-stack-api/llama_stack_api/apis/batches/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/batches/init.py
@ -1,9 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .batches import Batches, BatchObject, ListBatchesResponse
 __all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
--- a/src/llama-stack-api/llama_stack_api/apis/benchmarks/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/benchmarks/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .benchmarks import *
--- a/src/llama-stack-api/llama_stack_api/apis/common/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/init.py
@ -1,5 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/src/llama-stack-api/llama_stack_api/apis/conversations/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/conversations/init.py
@ -1,27 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
    ConversationItemCreateRequest,
    ConversationItemDeletedResource,
    ConversationItemList,
    Conversations,
    Metadata,
 )
 __all__ = [
    "Conversation",
    "ConversationDeletedResource",
    "ConversationItem",
    "ConversationItemCreateRequest",
    "ConversationItemDeletedResource",
    "ConversationItemList",
    "Conversations",
    "Metadata",
 ]
--- a/src/llama-stack-api/llama_stack_api/apis/datasetio/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasetio/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .datasetio import *
--- a/src/llama-stack-api/llama_stack_api/apis/datasets/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasets/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .datasets import *
--- a/src/llama-stack-api/llama_stack_api/apis/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datatypes.py
@ -1,158 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from enum import Enum, EnumMeta
 from pydantic import BaseModel, Field
 from llama_stack_api.schema_utils import json_schema_type
 class DynamicApiMeta(EnumMeta):
    def __new__(cls, name, bases, namespace):
        # Store the original enum values
        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
        # Create the enum class
        cls = super().__new__(cls, name, bases, namespace)
        # Store the original values for reference
        cls._original_values = original_values
        # Initialize _dynamic_values
        cls._dynamic_values = {}
        return cls
    def __call__(cls, value):
        try:
            return super().__call__(value)
        except ValueError as e:
            # If this value was already dynamically added, return it
            if value in cls._dynamic_values:
                return cls._dynamic_values[value]
            # If the value doesn't exist, create a new enum member
            # Create a new member name from the value
            member_name = value.lower().replace("-", "_")
            # If this member name already exists in the enum, return the existing member
            if member_name in cls._member_map_:
                return cls._member_map_[member_name]
            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
            # register new APIs explicitly
            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
    def __iter__(cls):
        # Allow iteration over both static and dynamic members
        yield from super().__iter__()
        if hasattr(cls, "_dynamic_values"):
            yield from cls._dynamic_values.values()
    def add(cls, value):
        """
        Add a new API to the enum.
        Used to register external APIs.
        """
        member_name = value.lower().replace("-", "_")
        # If this member name already exists in the enum, return it
        if member_name in cls._member_map_:
            return cls._member_map_[member_name]
        # Create a new enum member
        member = object.__new__(cls)
        member._name_ = member_name
        member._value_ = value
        # Add it to the enum class
        cls._member_map_[member_name] = member
        cls._member_names_.append(member_name)
        cls._member_type_ = str
        # Store it in our dynamic values
        cls._dynamic_values[value] = member
        return member
@json_schema_type
 class Api(Enum, metaclass=DynamicApiMeta):
    """Enumeration of all available APIs in the Llama Stack system.
    :cvar providers: Provider management and configuration
    :cvar inference: Text generation, chat completions, and embeddings
    :cvar safety: Content moderation and safety shields
    :cvar agents: Agent orchestration and execution
    :cvar batches: Batch processing for asynchronous API requests
    :cvar vector_io: Vector database operations and queries
    :cvar datasetio: Dataset input/output operations
    :cvar scoring: Model output evaluation and scoring
    :cvar eval: Model evaluation and benchmarking framework
    :cvar post_training: Fine-tuning and model training
    :cvar tool_runtime: Tool execution and management
    :cvar telemetry: Observability and system monitoring
    :cvar models: Model metadata and management
    :cvar shields: Safety shield implementations
    :cvar datasets: Dataset creation and management
    :cvar scoring_functions: Scoring function definitions
    :cvar benchmarks: Benchmark suite management
    :cvar tool_groups: Tool group organization
    :cvar files: File storage and management
    :cvar prompts: Prompt versions and management
    :cvar inspect: Built-in system inspection and introspection
    """
    providers = "providers"
    inference = "inference"
    safety = "safety"
    agents = "agents"
    batches = "batches"
    vector_io = "vector_io"
    datasetio = "datasetio"
    scoring = "scoring"
    eval = "eval"
    post_training = "post_training"
    tool_runtime = "tool_runtime"
    models = "models"
    shields = "shields"
    vector_stores = "vector_stores"  # only used for routing table
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
    tool_groups = "tool_groups"
    files = "files"
    prompts = "prompts"
    conversations = "conversations"
    # built-in API
    inspect = "inspect"
@json_schema_type
 class Error(BaseModel):
    """
    Error response from the API. Roughly follows RFC 7807.
    :param status: HTTP status code
    :param title: Error title, a short summary of the error which is invariant for an error type
    :param detail: Error detail, a longer human-readable description of the error
    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
    """
    status: int
    title: str
    detail: str
    instance: str | None = None
 class ExternalApiSpec(BaseModel):
    """Specification for an external API implementation."""
    module: str = Field(..., description="Python module containing the API implementation")
    name: str = Field(..., description="Name of the API")
    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
    protocol: str = Field(..., description="Name of the protocol class for the API")
--- a/src/llama-stack-api/llama_stack_api/apis/eval/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/eval/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .eval import *
--- a/src/llama-stack-api/llama_stack_api/apis/files/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/files/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .files import *
--- a/src/llama-stack-api/llama_stack_api/apis/inference/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inference/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .inference import *
--- a/src/llama-stack-api/llama_stack_api/apis/inspect/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inspect/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .inspect import *
--- a/src/llama-stack-api/llama_stack_api/apis/models/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/models/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .models import *
--- a/src/llama-stack-api/llama_stack_api/apis/post_training/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/post_training/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .post_training import *
--- a/src/llama-stack-api/llama_stack_api/apis/prompts/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/prompts/init.py
@ -1,9 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .prompts import ListPromptsResponse, Prompt, Prompts
 __all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
--- a/src/llama-stack-api/llama_stack_api/apis/providers/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/providers/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .providers import *
--- a/src/llama-stack-api/llama_stack_api/apis/safety/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/safety/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .safety import *
--- a/src/llama-stack-api/llama_stack_api/apis/scoring/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .scoring import *
--- a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring_functions/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .scoring_functions import *
--- a/src/llama-stack-api/llama_stack_api/apis/shields/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/shields/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .shields import *
--- a/src/llama-stack-api/llama_stack_api/apis/tools/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/tools/init.py
@ -1,8 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .rag_tool import *
 from .tools import *
--- a/src/llama-stack-api/llama_stack_api/apis/vector_io/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_io/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .vector_io import *
--- a/src/llama-stack-api/llama_stack_api/apis/vector_stores/init.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_stores/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .vector_stores import *
--- a/src/llama-stack-api/llama_stack_api/apis/batches/batches.py
+++ b/src/llama-stack-api/llama_stack_api/apis/batches/batches.py
@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 try:
    from openai.types import Batch as BatchObject
--- a/src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py
@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 class CommonBenchmarkFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/common/init.py
+++ b/src/llama-stack-api/llama_stack_api/common/init.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/content_types.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/content_types.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/errors.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/errors.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/job_types.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/job_types.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/responses.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/responses.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/tracing.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/tracing.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/training_types.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/training_types.py
--- a/src/llama-stack-api/llama_stack_api/apis/common/type_system.py
+++ b/src/llama-stack-api/llama_stack_api/apis/common/type_system.py
--- a/src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py
@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.openai_responses import (
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseMCPApprovalRequest,
    OpenAIResponseMCPApprovalResponse,
@ -20,9 +21,8 @@ from llama_stack_api.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageMCPListTools,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
 from llama_stack_api.apis.common.tracing import telemetry_traceable
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 Metadata = dict[str, str]
--- a/src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py
@ -6,10 +6,10 @@
 from typing import Any, Protocol, runtime_checkable
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.apis.datasets import Dataset
+from llama_stack_api.datasets import Dataset
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1BETA
 from llama_stack_api.schema_utils import webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 class DatasetStore(Protocol):
--- a/src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py
@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1BETA
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 class DatasetPurpose(StrEnum):
--- a/src/llama-stack-api/llama_stack_api/providers/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/providers/datatypes.py
@ -4,21 +4,172 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import StrEnum
+from enum import Enum, EnumMeta, StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.benchmarks import Benchmark
+from llama_stack_api.benchmarks import Benchmark
-from llama_stack_api.apis.datasets import Dataset
+from llama_stack_api.datasets import Dataset
-from llama_stack_api.apis.datatypes import Api
+from llama_stack_api.models import Model
 from llama_stack_api.apis.models import Model
 from llama_stack_api.apis.scoring_functions import ScoringFn
 from llama_stack_api.apis.shields import Shield
 from llama_stack_api.apis.tools import ToolGroup
 from llama_stack_api.apis.vector_stores import VectorStore
 from llama_stack_api.schema_utils import json_schema_type
 from llama_stack_api.scoring_functions import ScoringFn
 from llama_stack_api.shields import Shield
 from llama_stack_api.tools import ToolGroup
 from llama_stack_api.vector_stores import VectorStore
 class DynamicApiMeta(EnumMeta):
    def __new__(cls, name, bases, namespace):
        # Store the original enum values
        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
        # Create the enum class
        cls = super().__new__(cls, name, bases, namespace)
        # Store the original values for reference
        cls._original_values = original_values
        # Initialize _dynamic_values
        cls._dynamic_values = {}
        return cls
    def __call__(cls, value):
        try:
            return super().__call__(value)
        except ValueError as e:
            # If this value was already dynamically added, return it
            if value in cls._dynamic_values:
                return cls._dynamic_values[value]
            # If the value doesn't exist, create a new enum member
            # Create a new member name from the value
            member_name = value.lower().replace("-", "_")
            # If this member name already exists in the enum, return the existing member
            if member_name in cls._member_map_:
                return cls._member_map_[member_name]
            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
            # register new APIs explicitly
            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
    def __iter__(cls):
        # Allow iteration over both static and dynamic members
        yield from super().__iter__()
        if hasattr(cls, "_dynamic_values"):
            yield from cls._dynamic_values.values()
    def add(cls, value):
        """
        Add a new API to the enum.
        Used to register external APIs.
        """
        member_name = value.lower().replace("-", "_")
        # If this member name already exists in the enum, return it
        if member_name in cls._member_map_:
            return cls._member_map_[member_name]
        # Create a new enum member
        member = object.__new__(cls)
        member._name_ = member_name
        member._value_ = value
        # Add it to the enum class
        cls._member_map_[member_name] = member
        cls._member_names_.append(member_name)
        cls._member_type_ = str
        # Store it in our dynamic values
        cls._dynamic_values[value] = member
        return member
@json_schema_type
 class Api(Enum, metaclass=DynamicApiMeta):
    """Enumeration of all available APIs in the Llama Stack system.
    :cvar providers: Provider management and configuration
    :cvar inference: Text generation, chat completions, and embeddings
    :cvar safety: Content moderation and safety shields
    :cvar agents: Agent orchestration and execution
    :cvar batches: Batch processing for asynchronous API requests
    :cvar vector_io: Vector database operations and queries
    :cvar datasetio: Dataset input/output operations
    :cvar scoring: Model output evaluation and scoring
    :cvar eval: Model evaluation and benchmarking framework
    :cvar post_training: Fine-tuning and model training
    :cvar tool_runtime: Tool execution and management
    :cvar telemetry: Observability and system monitoring
    :cvar models: Model metadata and management
    :cvar shields: Safety shield implementations
    :cvar datasets: Dataset creation and management
    :cvar scoring_functions: Scoring function definitions
    :cvar benchmarks: Benchmark suite management
    :cvar tool_groups: Tool group organization
    :cvar files: File storage and management
    :cvar prompts: Prompt versions and management
    :cvar inspect: Built-in system inspection and introspection
    """
    providers = "providers"
    inference = "inference"
    safety = "safety"
    agents = "agents"
    batches = "batches"
    vector_io = "vector_io"
    datasetio = "datasetio"
    scoring = "scoring"
    eval = "eval"
    post_training = "post_training"
    tool_runtime = "tool_runtime"
    models = "models"
    shields = "shields"
    vector_stores = "vector_stores"  # only used for routing table
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
    tool_groups = "tool_groups"
    files = "files"
    prompts = "prompts"
    conversations = "conversations"
    # built-in API
    inspect = "inspect"
@json_schema_type
 class Error(BaseModel):
    """
    Error response from the API. Roughly follows RFC 7807.
    :param status: HTTP status code
    :param title: Error title, a short summary of the error which is invariant for an error type
    :param detail: Error detail, a longer human-readable description of the error
    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
    """
    status: int
    title: str
    detail: str
    instance: str | None = None
 class ExternalApiSpec(BaseModel):
    """Specification for an external API implementation."""
    module: str = Field(..., description="Python module containing the API implementation")
    name: str = Field(..., description="Name of the API")
    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
    protocol: str = Field(..., description="Name of the protocol class for the API")
 # Provider-related types (merged from providers/datatypes.py)
 # NOTE: These imports are forward references to avoid circular dependencies
 # They will be resolved at runtime when the classes are used
 class ModelsProtocolPrivate(Protocol):
--- a/src/llama-stack-api/llama_stack_api/apis/eval/eval.py
+++ b/src/llama-stack-api/llama_stack_api/apis/eval/eval.py
@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.common.job_types import Job
+from llama_stack_api.common.job_types import Job
-from llama_stack_api.apis.inference import SamplingParams, SystemMessage
+from llama_stack_api.inference import SamplingParams, SystemMessage
 from llama_stack_api.apis.scoring import ScoringResult
 from llama_stack_api.apis.scoring_functions import ScoringFnParams
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.scoring import ScoringResult
 from llama_stack_api.scoring_functions import ScoringFnParams
 from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/files/files.py
+++ b/src/llama-stack-api/llama_stack_api/apis/files/files.py
@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.common.responses import Order
+from llama_stack_api.common.responses import Order
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 # OpenAI Files API Models
--- a/src/llama-stack-api/llama_stack_api/apis/inference/inference.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inference/inference.py
@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
-from llama_stack_api.apis.common.content_types import InterleavedContent
+from llama_stack_api.common.content_types import InterleavedContent
-from llama_stack_api.apis.common.responses import (
+from llama_stack_api.common.responses import (
    Order,
 )
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack_api.apis.models import Model
+from llama_stack_api.models import Model
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py
@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack_api.apis.version import (
+from llama_stack_api.datatypes import HealthStatus
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import (
    LLAMA_STACK_API_V1,
 )
 from llama_stack_api.providers.datatypes import HealthStatus
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
--- a/src/llama-stack-api/llama_stack_api/apis/models/models.py
+++ b/src/llama-stack-api/llama_stack_api/apis/models/models.py
@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field, field_validator
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 class CommonModelFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py
@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
 from llama_stack_api.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
 from llama_stack_api.schema_utils import json_schema_type, register_schema
 from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
--- a/src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py
@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.common.content_types import URL
+from llama_stack_api.common.content_types import URL
-from llama_stack_api.apis.common.job_types import JobStatus
+from llama_stack_api.common.job_types import JobStatus
-from llama_stack_api.apis.common.training_types import Checkpoint
+from llama_stack_api.common.training_types import Checkpoint
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py
@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable
 from pydantic import BaseModel, Field, field_validator, model_validator
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/providers/providers.py
+++ b/src/llama-stack-api/llama_stack_api/apis/providers/providers.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.datatypes import HealthResponse
 from llama_stack_api.providers.datatypes import HealthResponse
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/providers/init.py
+++ b/src/llama-stack-api/llama_stack_api/providers/init.py
@ -1,7 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .datatypes import *  # noqa: F403
--- a/src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py
@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, field_validator
-from llama_stack_api.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
 class RRFRanker(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/resource.py
+++ b/src/llama-stack-api/llama_stack_api/apis/resource.py
--- a/src/llama-stack-api/llama_stack_api/apis/safety/safety.py
+++ b/src/llama-stack-api/llama_stack_api/apis/safety/safety.py
@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack_api.apis.inference import OpenAIMessageParam
+from llama_stack_api.inference import OpenAIMessageParam
 from llama_stack_api.apis.shields import Shield
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.shields import Shield
 from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py
@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel
 from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnParams
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 from llama_stack_api.version import LLAMA_STACK_API_V1
 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
--- a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py
+++ b/src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py
@ -16,10 +16,10 @@ from typing import (
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.common.type_system import ParamType
+from llama_stack_api.common.type_system import ParamType
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
--- a/src/llama-stack-api/llama_stack_api/apis/shields/shields.py
+++ b/src/llama-stack-api/llama_stack_api/apis/shields/shields.py
@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 class CommonShieldFields(BaseModel):
--- a/src/llama-stack-api/llama_stack_api/apis/tools/tools.py
+++ b/src/llama-stack-api/llama_stack_api/apis/tools/tools.py
@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable
-from llama_stack_api.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py
@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field
-from llama_stack_api.apis.common.tracing import telemetry_traceable
+from llama_stack_api.common.tracing import telemetry_traceable
-from llama_stack_api.apis.inference import InterleavedContent
+from llama_stack_api.inference import InterleavedContent
 from llama_stack_api.apis.vector_stores import VectorStore
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.strong_typing.schema import register_schema
 from llama_stack_api.vector_stores import VectorStore
 from llama_stack_api.version import LLAMA_STACK_API_V1
@json_schema_type
--- a/src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py
@ -8,7 +8,7 @@ from typing import Literal
 from pydantic import BaseModel
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 # Internal resource type for storing the vector store routing and other information
--- a/src/llama-stack-api/llama_stack_api/apis/version.py
+++ b/src/llama-stack-api/llama_stack_api/apis/version.py
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@ -9,7 +9,7 @@ import sys
 from pathlib import Path
 import yaml
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from termcolor import cprint
 from llama_stack.cli.stack.utils import ImageType
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@ -11,7 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 import yaml
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from termcolor import cprint
 from llama_stack.core.datatypes import (
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -6,7 +6,7 @@
 import sys
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from pydantic import BaseModel
 from termcolor import cprint
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@ -12,7 +12,7 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 import httpx
-from llama_stack_api.providers.datatypes import RemoteProviderConfig
+from llama_stack_api.datatypes import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -6,7 +6,7 @@
 import textwrap
 from typing import Any
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -8,7 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
-from llama_stack_api.apis.conversations.conversations import (
+from llama_stack_api.conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -9,21 +9,21 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
-from llama_stack_api.apis.benchmarks import Benchmark, BenchmarkInput
+from llama_stack_api.benchmarks import Benchmark, BenchmarkInput
-from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Dataset, DatasetInput
+from llama_stack_api.datasets import Dataset, DatasetInput
-from llama_stack_api.apis.eval import Eval
+from llama_stack_api.datatypes import Api, ProviderSpec
-from llama_stack_api.apis.inference import Inference
+from llama_stack_api.eval import Eval
-from llama_stack_api.apis.models import Model, ModelInput
+from llama_stack_api.inference import Inference
-from llama_stack_api.apis.resource import Resource
+from llama_stack_api.models import Model, ModelInput
-from llama_stack_api.apis.safety import Safety
+from llama_stack_api.resource import Resource
-from llama_stack_api.apis.scoring import Scoring
+from llama_stack_api.safety import Safety
-from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnInput
+from llama_stack_api.scoring import Scoring
-from llama_stack_api.apis.shields import Shield, ShieldInput
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack_api.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
+from llama_stack_api.shields import Shield, ShieldInput
-from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack_api.apis.vector_stores import VectorStore, VectorStoreInput
+from llama_stack_api.vector_io import VectorIO
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.vector_stores import VectorStore, VectorStoreInput
 from pydantic import BaseModel, Field, field_validator, model_validator
 from llama_stack.core.access_control.datatypes import AccessRule
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@ -10,7 +10,7 @@ import os
 from typing import Any
 import yaml
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
    Api,
    InlineProviderSpec,
    ProviderSpec,
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@ -6,7 +6,7 @@
 import yaml
-from llama_stack_api.apis.datatypes import Api, ExternalApiSpec
+from llama_stack_api.datatypes import Api, ExternalApiSpec
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@ -6,14 +6,14 @@
 from importlib.metadata import version
-from llama_stack_api.apis.inspect import (
+from llama_stack_api.datatypes import HealthStatus
 from llama_stack_api.inspect import (
    HealthInfo,
    Inspect,
    ListRoutesResponse,
    RouteInfo,
    VersionInfo,
 )
 from llama_stack_api.providers.datatypes import HealthStatus
 from pydantic import BaseModel
 from llama_stack.core.datatypes import StackRunConfig
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@ -7,7 +7,7 @@
 import json
 from typing import Any
-from llama_stack_api.apis.prompts import ListPromptsResponse, Prompt, Prompts
+from llama_stack_api.prompts import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 from llama_stack.core.datatypes import StackRunConfig
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@ -7,8 +7,8 @@
 import asyncio
 from typing import Any
-from llama_stack_api.apis.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack_api.datatypes import HealthResponse, HealthStatus
-from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus
+from llama_stack_api.providers import ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@ -8,33 +8,17 @@ import importlib.metadata
 import inspect
 from typing import Any
-from llama_stack_api.apis.agents import Agents
+from llama_stack_api.agents import Agents
-from llama_stack_api.apis.batches import Batches
+from llama_stack_api.batches import Batches
-from llama_stack_api.apis.benchmarks import Benchmarks
+from llama_stack_api.benchmarks import Benchmarks
-from llama_stack_api.apis.conversations import Conversations
+from llama_stack_api.conversations import Conversations
-from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
+from llama_stack_api.datasets import Datasets
-from llama_stack_api.apis.datatypes import ExternalApiSpec
+from llama_stack_api.datatypes import (
 from llama_stack_api.apis.eval import Eval
 from llama_stack_api.apis.files import Files
 from llama_stack_api.apis.inference import Inference, InferenceProvider
 from llama_stack_api.apis.inspect import Inspect
 from llama_stack_api.apis.models import Models
 from llama_stack_api.apis.post_training import PostTraining
 from llama_stack_api.apis.prompts import Prompts
 from llama_stack_api.apis.providers import Providers as ProvidersAPI
 from llama_stack_api.apis.safety import Safety
 from llama_stack_api.apis.scoring import Scoring
 from llama_stack_api.apis.scoring_functions import ScoringFunctions
 from llama_stack_api.apis.shields import Shields
 from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
 from llama_stack_api.apis.vector_io import VectorIO
 from llama_stack_api.apis.vector_stores import VectorStore
 from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack_api.providers.datatypes import (
    Api,
    BenchmarksProtocolPrivate,
    DatasetsProtocolPrivate,
    ExternalApiSpec,
    ModelsProtocolPrivate,
    ProviderSpec,
    RemoteProviderConfig,
@ -43,6 +27,22 @@ from llama_stack_api.providers.datatypes import (
    ShieldsProtocolPrivate,
    ToolGroupsProtocolPrivate,
 )
 from llama_stack_api.eval import Eval
 from llama_stack_api.files import Files
 from llama_stack_api.inference import Inference, InferenceProvider
 from llama_stack_api.inspect import Inspect
 from llama_stack_api.models import Models
 from llama_stack_api.post_training import PostTraining
 from llama_stack_api.prompts import Prompts
 from llama_stack_api.providers import Providers as ProvidersAPI
 from llama_stack_api.safety import Safety
 from llama_stack_api.scoring import Scoring
 from llama_stack_api.scoring_functions import ScoringFunctions
 from llama_stack_api.shields import Shields
 from llama_stack_api.tools import ToolGroups, ToolRuntime
 from llama_stack_api.vector_io import VectorIO
 from llama_stack_api.vector_stores import VectorStore
 from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
--- a/src/llama_stack/core/routers/init.py
+++ b/src/llama_stack/core/routers/init.py
@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack_api.providers.datatypes import Api, RoutingTable
+from llama_stack_api.datatypes import Api, RoutingTable
 from llama_stack.core.datatypes import (
    AccessRule,
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@ -6,10 +6,10 @@
 from typing import Any
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import DatasetPurpose, DataSource
+from llama_stack_api.datasets import DatasetPurpose, DataSource
-from llama_stack_api.providers.datatypes import RoutingTable
+from llama_stack_api.datatypes import RoutingTable
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@ -6,14 +6,14 @@
 from typing import Any
-from llama_stack_api.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
+from llama_stack_api.datatypes import RoutingTable
-from llama_stack_api.apis.scoring import (
+from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
 from llama_stack_api.scoring import (
    ScoreBatchResponse,
    ScoreResponse,
    Scoring,
    ScoringFnParams,
 )
 from llama_stack_api.providers.datatypes import RoutingTable
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@ -11,13 +11,16 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 from fastapi import Body
-from llama_stack_api.apis.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack_api.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack_api.apis.inference import (
+from llama_stack_api.datatypes import HealthResponse, HealthStatus, RoutingTable
 from llama_stack_api.inference import (
    Inference,
    ListOpenAIChatCompletionResponse,
    OpenAIAssistantMessageParam,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartTextParam,
    OpenAIChatCompletionRequestWithExtraBody,
    OpenAIChatCompletionToolCall,
    OpenAIChatCompletionToolCallFunction,
@ -32,12 +35,7 @@ from llama_stack_api.apis.inference import (
    Order,
    RerankResponse,
 )
-from llama_stack_api.apis.inference.inference import (
+from llama_stack_api.models import ModelType
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartTextParam,
 )
 from llama_stack_api.apis.models import ModelType
 from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
 from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
 from pydantic import TypeAdapter
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@ -6,11 +6,10 @@
 from typing import Any
-from llama_stack_api.apis.inference import OpenAIMessageParam
+from llama_stack_api.datatypes import RoutingTable
-from llama_stack_api.apis.safety import RunShieldResponse, Safety
+from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety.safety import ModerationObject
+from llama_stack_api.safety import ModerationObject, RunShieldResponse, Safety
-from llama_stack_api.apis.shields import Shield
+from llama_stack_api.shields import Shield
 from llama_stack_api.providers.datatypes import RoutingTable
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@ -6,10 +6,10 @@
 from typing import Any
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
    URL,
 )
-from llama_stack_api.apis.tools import (
+from llama_stack_api.tools import (
    ListToolDefsResponse,
    ToolRuntime,
 )
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -9,9 +9,10 @@ import uuid
 from typing import Annotated, Any
 from fastapi import Body
-from llama_stack_api.apis.common.content_types import InterleavedContent
+from llama_stack_api.common.content_types import InterleavedContent
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.datatypes import HealthResponse, HealthStatus, RoutingTable
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.models import ModelType
 from llama_stack_api.vector_io import (
    Chunk,
    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
    OpenAICreateVectorStoreRequestWithExtraBody,
@ -32,7 +33,6 @@ from llama_stack_api.apis.vector_io import (
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
 from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@ -6,7 +6,7 @@
 from typing import Any
-from llama_stack_api.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
 from llama_stack.core.datatypes import (
    BenchmarkWithOwner,
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@ -6,10 +6,10 @@
 from typing import Any
-from llama_stack_api.apis.common.errors import ModelNotFoundError
+from llama_stack_api.common.errors import ModelNotFoundError
-from llama_stack_api.apis.models import Model
+from llama_stack_api.datatypes import Api, RoutingTable
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.models import Model
-from llama_stack_api.providers.datatypes import Api, RoutingTable
+from llama_stack_api.resource import ResourceType
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@ -7,8 +7,8 @@
 import uuid
 from typing import Any
-from llama_stack_api.apis.common.errors import DatasetNotFoundError
+from llama_stack_api.common.errors import DatasetNotFoundError
-from llama_stack_api.apis.datasets import (
+from llama_stack_api.datasets import (
    Dataset,
    DatasetPurpose,
    Datasets,
@ -18,7 +18,7 @@ from llama_stack_api.apis.datasets import (
    RowsDataSource,
    URIDataSource,
 )
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.resource import ResourceType
 from llama_stack.core.datatypes import (
    DatasetWithOwner,
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@ -7,8 +7,8 @@
 import time
 from typing import Any
-from llama_stack_api.apis.common.errors import ModelNotFoundError
+from llama_stack_api.common.errors import ModelNotFoundError
-from llama_stack_api.apis.models import (
+from llama_stack_api.models import (
    ListModelsResponse,
    Model,
    Models,
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack_api.apis.common.type_system import ParamType
+from llama_stack_api.common.type_system import ParamType
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.resource import ResourceType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.scoring_functions import (
    ListScoringFunctionsResponse,
    ScoringFn,
    ScoringFnParams,
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@ -6,8 +6,8 @@
 from typing import Any
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.resource import ResourceType
-from llama_stack_api.apis.shields import ListShieldsResponse, Shield, Shields
+from llama_stack_api.shields import ListShieldsResponse, Shield, Shields
 from llama_stack.core.datatypes import (
    ShieldWithOwner,
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@ -6,9 +6,9 @@
 from typing import Any
-from llama_stack_api.apis.common.content_types import URL
+from llama_stack_api.common.content_types import URL
-from llama_stack_api.apis.common.errors import ToolGroupNotFoundError
+from llama_stack_api.common.errors import ToolGroupNotFoundError
-from llama_stack_api.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+from llama_stack_api.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -6,12 +6,12 @@
 from typing import Any
-from llama_stack_api.apis.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack_api.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.resource import ResourceType
 # Removed VectorStores import to avoid exposing public API
-from llama_stack_api.apis.vector_io.vector_io import (
+from llama_stack_api.vector_io import (
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@ -11,7 +11,7 @@ from urllib.parse import parse_qs, urljoin, urlparse
 import httpx
 import jwt
-from llama_stack_api.apis.common.errors import TokenValidationError
+from llama_stack_api.common.errors import TokenValidationError
 from pydantic import BaseModel, Field
 from llama_stack.core.datatypes import (
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@ -10,7 +10,7 @@ from collections.abc import Callable
 from typing import Any
 from aiohttp import hdrs
-from llama_stack_api.apis.datatypes import Api, ExternalApiSpec
+from llama_stack_api.datatypes import Api, ExternalApiSpec
 from llama_stack_api.schema_utils import WebMethod
 from starlette.routing import Route
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@ -28,9 +28,9 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
-from llama_stack_api.apis.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@ -12,27 +12,27 @@ import tempfile
 from typing import Any
 import yaml
-from llama_stack_api.apis.agents import Agents
+from llama_stack_api.agents import Agents
-from llama_stack_api.apis.batches import Batches
+from llama_stack_api.batches import Batches
-from llama_stack_api.apis.benchmarks import Benchmarks
+from llama_stack_api.benchmarks import Benchmarks
-from llama_stack_api.apis.conversations import Conversations
+from llama_stack_api.conversations import Conversations
-from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
+from llama_stack_api.datasets import Datasets
-from llama_stack_api.apis.eval import Eval
+from llama_stack_api.datatypes import Api
-from llama_stack_api.apis.files import Files
+from llama_stack_api.eval import Eval
-from llama_stack_api.apis.inference import Inference
+from llama_stack_api.files import Files
-from llama_stack_api.apis.inspect import Inspect
+from llama_stack_api.inference import Inference
-from llama_stack_api.apis.models import Models
+from llama_stack_api.inspect import Inspect
-from llama_stack_api.apis.post_training import PostTraining
+from llama_stack_api.models import Models
-from llama_stack_api.apis.prompts import Prompts
+from llama_stack_api.post_training import PostTraining
-from llama_stack_api.apis.providers import Providers
+from llama_stack_api.prompts import Prompts
-from llama_stack_api.apis.safety import Safety
+from llama_stack_api.providers import Providers
-from llama_stack_api.apis.scoring import Scoring
+from llama_stack_api.safety import Safety
-from llama_stack_api.apis.scoring_functions import ScoringFunctions
+from llama_stack_api.scoring import Scoring
-from llama_stack_api.apis.shields import Shields
+from llama_stack_api.scoring_functions import ScoringFunctions
-from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
+from llama_stack_api.shields import Shields
-from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.tools import ToolGroups, ToolRuntime
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 from llama_stack.core.datatypes import (
    BuildProvider,
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@ -6,7 +6,7 @@
 from pathlib import Path
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 from llama_stack.core.datatypes import (
    BuildProvider,
--- a/Show more
+++ b/Show more