diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index 2bbb079cf..ec054c7e6 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -153,7 +153,7 @@ description: |
   Example using RAGQueryConfig with different search modes:
 
   ```python
-  from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
 
   # Vector search
   config = RAGQueryConfig(mode="vector", max_chunks=5)
@@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 3fee1091e..0bcdee4bb 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api.version import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index cf88d754d..9b5c375d0 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -16,7 +16,7 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 
 from fastapi import UploadFile
 
-from llama_stack_api.apis.datatypes import Error
+from llama_stack_api.datatypes import Error
 from llama_stack_api.strong_typing.core import JsonType
 from llama_stack_api.strong_typing.docstring import Docstring, parse_type
 from llama_stack_api.strong_typing.inspection import (
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 096c3ee18..65881df43 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,7 +11,7 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
 
 from termcolor import colored
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 427b22916..033c3d572 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,7 +14,7 @@ import os
 from pathlib import Path
 
 import fire
-from llama_stack_api.apis.common.errors import ModelNotFoundError
+from llama_stack_api.common.errors import ModelNotFoundError
 
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py
index 06bb29797..d62d626ad 100755
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None:
     """Extract docstring from the API protocol class."""
     try:
         # Import the API module dynamically
-        api_module = __import__(f"llama_stack_api.apis.{api_name}", fromlist=[api_name.title()])
+        api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()])
 
         # Get the main protocol class (usually capitalized API name)
         protocol_class_name = api_name.title()
@@ -83,9 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
                 # this string replace is ridiculous
                 field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
                 field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack_api.apis.inference.inference.", "")
+                field_type = field_type.replace("llama_stack_api.inference.", "")
                 field_type = field_type.replace("llama_stack.providers.", "")
-                field_type = field_type.replace("llama_stack_api.providers.", "")
+                field_type = field_type.replace("llama_stack_api.datatypes.", "")
 
                 default_value = field.default
                 if field.default_factory is not None:
diff --git a/src/llama-stack-api/README.md b/src/llama-stack-api/README.md
index d6ca0217b..aa6b05722 100644
--- a/src/llama-stack-api/README.md
+++ b/src/llama-stack-api/README.md
@@ -58,9 +58,9 @@ Current version: **0.1.0**
 ## Usage Example
 
 ```python
-from llama_stack_api.apis.inference import Inference, ChatCompletionRequest
+from llama_stack_api.inference import Inference, ChatCompletionRequest
 from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec
-from llama_stack_api.apis.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 
 # Use protocol definitions for type checking
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama-stack-api/llama_stack_api/__init__.py
index 2f11fb77e..beff66128 100644
--- a/src/llama-stack-api/llama_stack_api/__init__.py
+++ b/src/llama-stack-api/llama_stack_api/__init__.py
@@ -13,14 +13,15 @@ and clients that need to interact with Llama Stack APIs without requiring the fu
 server implementation.
 
 Key components:
-- apis: Protocol definitions for all Llama Stack APIs
-- providers: Provider interface specifications
+- API modules (agents, inference, safety, etc.): Protocol definitions for all Llama Stack APIs
+- datatypes: Core data types and provider specifications
+- common: Common data types used across APIs
 - strong_typing: Type system utilities
 - schema_utils: Schema validation and utilities
 """
 
 __version__ = "0.1.0"
 
-from . import apis, providers, schema_utils, strong_typing  # noqa: F401
+from . import common, datatypes, schema_utils, strong_typing  # noqa: F401
 
-__all__ = ["apis", "providers", "schema_utils", "strong_typing"]
+__all__ = ["common", "datatypes", "schema_utils", "strong_typing"]
diff --git a/src/llama-stack-api/llama_stack_api/apis/agents/agents.py b/src/llama-stack-api/llama_stack_api/agents.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/agents/agents.py
rename to src/llama-stack-api/llama_stack_api/agents.py
index 194eec7c4..ca0611746 100644
--- a/src/llama-stack-api/llama_stack_api/apis/agents/agents.py
+++ b/src/llama-stack-api/llama_stack_api/agents.py
@@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack_api.apis.common.responses import Order
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.responses import Order
 from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 from .openai_responses import (
     ListOpenAIResponseInputItem,
diff --git a/src/llama-stack-api/llama_stack_api/apis/agents/__init__.py b/src/llama-stack-api/llama_stack_api/apis/agents/__init__.py
deleted file mode 100644
index 6416b283b..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/agents/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .agents import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/batches/__init__.py b/src/llama-stack-api/llama_stack_api/apis/batches/__init__.py
deleted file mode 100644
index 9ce7d3d75..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/batches/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .batches import Batches, BatchObject, ListBatchesResponse
-
-__all__ = ["Batches", "BatchObject", "ListBatchesResponse"]
diff --git a/src/llama-stack-api/llama_stack_api/apis/benchmarks/__init__.py b/src/llama-stack-api/llama_stack_api/apis/benchmarks/__init__.py
deleted file mode 100644
index 62d1b367c..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/benchmarks/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .benchmarks import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/__init__.py b/src/llama-stack-api/llama_stack_api/apis/common/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/common/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama-stack-api/llama_stack_api/apis/conversations/__init__.py b/src/llama-stack-api/llama_stack_api/apis/conversations/__init__.py
deleted file mode 100644
index b6ddc5999..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/conversations/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .conversations import (
-    Conversation,
-    ConversationDeletedResource,
-    ConversationItem,
-    ConversationItemCreateRequest,
-    ConversationItemDeletedResource,
-    ConversationItemList,
-    Conversations,
-    Metadata,
-)
-
-__all__ = [
-    "Conversation",
-    "ConversationDeletedResource",
-    "ConversationItem",
-    "ConversationItemCreateRequest",
-    "ConversationItemDeletedResource",
-    "ConversationItemList",
-    "Conversations",
-    "Metadata",
-]
diff --git a/src/llama-stack-api/llama_stack_api/apis/datasetio/__init__.py b/src/llama-stack-api/llama_stack_api/apis/datasetio/__init__.py
deleted file mode 100644
index 8c087bfa4..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/datasetio/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasetio import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/datasets/__init__.py b/src/llama-stack-api/llama_stack_api/apis/datasets/__init__.py
deleted file mode 100644
index 9c9a128d2..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/datasets/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasets import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/datatypes.py b/src/llama-stack-api/llama_stack_api/apis/datatypes.py
deleted file mode 100644
index 3f2e93945..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/datatypes.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from enum import Enum, EnumMeta
-
-from pydantic import BaseModel, Field
-
-from llama_stack_api.schema_utils import json_schema_type
-
-
-class DynamicApiMeta(EnumMeta):
-    def __new__(cls, name, bases, namespace):
-        # Store the original enum values
-        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
-
-        # Create the enum class
-        cls = super().__new__(cls, name, bases, namespace)
-
-        # Store the original values for reference
-        cls._original_values = original_values
-        # Initialize _dynamic_values
-        cls._dynamic_values = {}
-
-        return cls
-
-    def __call__(cls, value):
-        try:
-            return super().__call__(value)
-        except ValueError as e:
-            # If this value was already dynamically added, return it
-            if value in cls._dynamic_values:
-                return cls._dynamic_values[value]
-
-            # If the value doesn't exist, create a new enum member
-            # Create a new member name from the value
-            member_name = value.lower().replace("-", "_")
-
-            # If this member name already exists in the enum, return the existing member
-            if member_name in cls._member_map_:
-                return cls._member_map_[member_name]
-
-            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
-            # register new APIs explicitly
-            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
-
-    def __iter__(cls):
-        # Allow iteration over both static and dynamic members
-        yield from super().__iter__()
-        if hasattr(cls, "_dynamic_values"):
-            yield from cls._dynamic_values.values()
-
-    def add(cls, value):
-        """
-        Add a new API to the enum.
-        Used to register external APIs.
-        """
-        member_name = value.lower().replace("-", "_")
-
-        # If this member name already exists in the enum, return it
-        if member_name in cls._member_map_:
-            return cls._member_map_[member_name]
-
-        # Create a new enum member
-        member = object.__new__(cls)
-        member._name_ = member_name
-        member._value_ = value
-
-        # Add it to the enum class
-        cls._member_map_[member_name] = member
-        cls._member_names_.append(member_name)
-        cls._member_type_ = str
-
-        # Store it in our dynamic values
-        cls._dynamic_values[value] = member
-
-        return member
-
-
-@json_schema_type
-class Api(Enum, metaclass=DynamicApiMeta):
-    """Enumeration of all available APIs in the Llama Stack system.
-    :cvar providers: Provider management and configuration
-    :cvar inference: Text generation, chat completions, and embeddings
-    :cvar safety: Content moderation and safety shields
-    :cvar agents: Agent orchestration and execution
-    :cvar batches: Batch processing for asynchronous API requests
-    :cvar vector_io: Vector database operations and queries
-    :cvar datasetio: Dataset input/output operations
-    :cvar scoring: Model output evaluation and scoring
-    :cvar eval: Model evaluation and benchmarking framework
-    :cvar post_training: Fine-tuning and model training
-    :cvar tool_runtime: Tool execution and management
-    :cvar telemetry: Observability and system monitoring
-    :cvar models: Model metadata and management
-    :cvar shields: Safety shield implementations
-    :cvar datasets: Dataset creation and management
-    :cvar scoring_functions: Scoring function definitions
-    :cvar benchmarks: Benchmark suite management
-    :cvar tool_groups: Tool group organization
-    :cvar files: File storage and management
-    :cvar prompts: Prompt versions and management
-    :cvar inspect: Built-in system inspection and introspection
-    """
-
-    providers = "providers"
-    inference = "inference"
-    safety = "safety"
-    agents = "agents"
-    batches = "batches"
-    vector_io = "vector_io"
-    datasetio = "datasetio"
-    scoring = "scoring"
-    eval = "eval"
-    post_training = "post_training"
-    tool_runtime = "tool_runtime"
-
-    models = "models"
-    shields = "shields"
-    vector_stores = "vector_stores"  # only used for routing table
-    datasets = "datasets"
-    scoring_functions = "scoring_functions"
-    benchmarks = "benchmarks"
-    tool_groups = "tool_groups"
-    files = "files"
-    prompts = "prompts"
-    conversations = "conversations"
-
-    # built-in API
-    inspect = "inspect"
-
-
-@json_schema_type
-class Error(BaseModel):
-    """
-    Error response from the API. Roughly follows RFC 7807.
-
-    :param status: HTTP status code
-    :param title: Error title, a short summary of the error which is invariant for an error type
-    :param detail: Error detail, a longer human-readable description of the error
-    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
-    """
-
-    status: int
-    title: str
-    detail: str
-    instance: str | None = None
-
-
-class ExternalApiSpec(BaseModel):
-    """Specification for an external API implementation."""
-
-    module: str = Field(..., description="Python module containing the API implementation")
-    name: str = Field(..., description="Name of the API")
-    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
-    protocol: str = Field(..., description="Name of the protocol class for the API")
diff --git a/src/llama-stack-api/llama_stack_api/apis/eval/__init__.py b/src/llama-stack-api/llama_stack_api/apis/eval/__init__.py
deleted file mode 100644
index 28a1d6049..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/eval/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .eval import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/files/__init__.py b/src/llama-stack-api/llama_stack_api/apis/files/__init__.py
deleted file mode 100644
index 189e4de19..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/files/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .files import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/inference/__init__.py b/src/llama-stack-api/llama_stack_api/apis/inference/__init__.py
deleted file mode 100644
index f0c8783c1..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/inference/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inference import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/inspect/__init__.py b/src/llama-stack-api/llama_stack_api/apis/inspect/__init__.py
deleted file mode 100644
index 016937e3d..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/inspect/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inspect import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/models/__init__.py b/src/llama-stack-api/llama_stack_api/apis/models/__init__.py
deleted file mode 100644
index ee90106b6..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/models/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .models import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/post_training/__init__.py b/src/llama-stack-api/llama_stack_api/apis/post_training/__init__.py
deleted file mode 100644
index 695575a30..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/post_training/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .post_training import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/prompts/__init__.py b/src/llama-stack-api/llama_stack_api/apis/prompts/__init__.py
deleted file mode 100644
index 6070f3450..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/prompts/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .prompts import ListPromptsResponse, Prompt, Prompts
-
-__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
diff --git a/src/llama-stack-api/llama_stack_api/apis/providers/__init__.py b/src/llama-stack-api/llama_stack_api/apis/providers/__init__.py
deleted file mode 100644
index e35e2fe47..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/providers/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .providers import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/safety/__init__.py b/src/llama-stack-api/llama_stack_api/apis/safety/__init__.py
deleted file mode 100644
index d93bc1355..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/safety/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .safety import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/scoring/__init__.py b/src/llama-stack-api/llama_stack_api/apis/scoring/__init__.py
deleted file mode 100644
index 624b9e704..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/scoring/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/__init__.py b/src/llama-stack-api/llama_stack_api/apis/scoring_functions/__init__.py
deleted file mode 100644
index fc1de0311..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring_functions import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/shields/__init__.py b/src/llama-stack-api/llama_stack_api/apis/shields/__init__.py
deleted file mode 100644
index 783a4d124..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/shields/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .shields import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/tools/__init__.py b/src/llama-stack-api/llama_stack_api/apis/tools/__init__.py
deleted file mode 100644
index b25310ecf..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/tools/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .rag_tool import *
-from .tools import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/vector_io/__init__.py b/src/llama-stack-api/llama_stack_api/apis/vector_io/__init__.py
deleted file mode 100644
index 3f4c60805..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/vector_io/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_io import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/vector_stores/__init__.py b/src/llama-stack-api/llama_stack_api/apis/vector_stores/__init__.py
deleted file mode 100644
index 8fc34058a..000000000
--- a/src/llama-stack-api/llama_stack_api/apis/vector_stores/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_stores import *
diff --git a/src/llama-stack-api/llama_stack_api/apis/batches/batches.py b/src/llama-stack-api/llama_stack_api/batches.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/batches/batches.py
rename to src/llama-stack-api/llama_stack_api/batches.py
index 83cc17422..00c47d39f 100644
--- a/src/llama-stack-api/llama_stack_api/apis/batches/batches.py
+++ b/src/llama-stack-api/llama_stack_api/batches.py
@@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 try:
     from openai.types import Batch as BatchObject
diff --git a/src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py b/src/llama-stack-api/llama_stack_api/benchmarks.py
similarity index 96%
rename from src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py
rename to src/llama-stack-api/llama_stack_api/benchmarks.py
index 9f6eb0dbc..e9ac3a8b8 100644
--- a/src/llama-stack-api/llama_stack_api/apis/benchmarks/benchmarks.py
+++ b/src/llama-stack-api/llama_stack_api/benchmarks.py
@@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 class CommonBenchmarkFields(BaseModel):
diff --git a/src/llama-stack-api/llama_stack_api/apis/__init__.py b/src/llama-stack-api/llama_stack_api/common/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/__init__.py
rename to src/llama-stack-api/llama_stack_api/common/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/content_types.py b/src/llama-stack-api/llama_stack_api/common/content_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/content_types.py
rename to src/llama-stack-api/llama_stack_api/common/content_types.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/errors.py b/src/llama-stack-api/llama_stack_api/common/errors.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/errors.py
rename to src/llama-stack-api/llama_stack_api/common/errors.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/job_types.py b/src/llama-stack-api/llama_stack_api/common/job_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/job_types.py
rename to src/llama-stack-api/llama_stack_api/common/job_types.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/responses.py b/src/llama-stack-api/llama_stack_api/common/responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/responses.py
rename to src/llama-stack-api/llama_stack_api/common/responses.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/tracing.py b/src/llama-stack-api/llama_stack_api/common/tracing.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/tracing.py
rename to src/llama-stack-api/llama_stack_api/common/tracing.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/training_types.py b/src/llama-stack-api/llama_stack_api/common/training_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/training_types.py
rename to src/llama-stack-api/llama_stack_api/common/training_types.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/common/type_system.py b/src/llama-stack-api/llama_stack_api/common/type_system.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/common/type_system.py
rename to src/llama-stack-api/llama_stack_api/common/type_system.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py b/src/llama-stack-api/llama_stack_api/conversations.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py
rename to src/llama-stack-api/llama_stack_api/conversations.py
index 1dbb7bbd3..4854181d1 100644
--- a/src/llama-stack-api/llama_stack_api/apis/conversations/conversations.py
+++ b/src/llama-stack-api/llama_stack_api/conversations.py
@@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.openai_responses import (
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseMCPApprovalRequest,
     OpenAIResponseMCPApprovalResponse,
@@ -20,9 +21,8 @@ from llama_stack_api.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 Metadata = dict[str, str]
 
diff --git a/src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py b/src/llama-stack-api/llama_stack_api/datasetio.py
similarity index 91%
rename from src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py
rename to src/llama-stack-api/llama_stack_api/datasetio.py
index 7869d40bb..309a8ff41 100644
--- a/src/llama-stack-api/llama_stack_api/apis/datasetio/datasetio.py
+++ b/src/llama-stack-api/llama_stack_api/datasetio.py
@@ -6,10 +6,10 @@
 
 from typing import Any, Protocol, runtime_checkable
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack_api.apis.datasets import Dataset
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1BETA
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasets import Dataset
 from llama_stack_api.schema_utils import webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 
 
 class DatasetStore(Protocol):
diff --git a/src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py b/src/llama-stack-api/llama_stack_api/datasets.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py
rename to src/llama-stack-api/llama_stack_api/datasets.py
index 8f4aaf7ff..76d787078 100644
--- a/src/llama-stack-api/llama_stack_api/apis/datasets/datasets.py
+++ b/src/llama-stack-api/llama_stack_api/datasets.py
@@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1BETA
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1BETA
 
 
 class DatasetPurpose(StrEnum):
diff --git a/src/llama-stack-api/llama_stack_api/providers/datatypes.py b/src/llama-stack-api/llama_stack_api/datatypes.py
similarity index 52%
rename from src/llama-stack-api/llama_stack_api/providers/datatypes.py
rename to src/llama-stack-api/llama_stack_api/datatypes.py
index edc04e1bc..f024068f3 100644
--- a/src/llama-stack-api/llama_stack_api/providers/datatypes.py
+++ b/src/llama-stack-api/llama_stack_api/datatypes.py
@@ -4,21 +4,172 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import StrEnum
+from enum import Enum, EnumMeta, StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.benchmarks import Benchmark
-from llama_stack_api.apis.datasets import Dataset
-from llama_stack_api.apis.datatypes import Api
-from llama_stack_api.apis.models import Model
-from llama_stack_api.apis.scoring_functions import ScoringFn
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.apis.tools import ToolGroup
-from llama_stack_api.apis.vector_stores import VectorStore
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.models import Model
 from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api.scoring_functions import ScoringFn
+from llama_stack_api.shields import Shield
+from llama_stack_api.tools import ToolGroup
+from llama_stack_api.vector_stores import VectorStore
+
+
+class DynamicApiMeta(EnumMeta):
+    def __new__(cls, name, bases, namespace):
+        # Store the original enum values
+        original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
+
+        # Create the enum class
+        cls = super().__new__(cls, name, bases, namespace)
+
+        # Store the original values for reference
+        cls._original_values = original_values
+        # Initialize _dynamic_values
+        cls._dynamic_values = {}
+
+        return cls
+
+    def __call__(cls, value):
+        try:
+            return super().__call__(value)
+        except ValueError as e:
+            # If this value was already dynamically added, return it
+            if value in cls._dynamic_values:
+                return cls._dynamic_values[value]
+
+            # If the value doesn't exist, create a new enum member
+            # Create a new member name from the value
+            member_name = value.lower().replace("-", "_")
+
+            # If this member name already exists in the enum, return the existing member
+            if member_name in cls._member_map_:
+                return cls._member_map_[member_name]
+
+            # Instead of creating a new member, raise ValueError to force users to use Api.add() to
+            # register new APIs explicitly
+            raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
+
+    def __iter__(cls):
+        # Allow iteration over both static and dynamic members
+        yield from super().__iter__()
+        if hasattr(cls, "_dynamic_values"):
+            yield from cls._dynamic_values.values()
+
+    def add(cls, value):
+        """
+        Add a new API to the enum.
+        Used to register external APIs.
+        """
+        member_name = value.lower().replace("-", "_")
+
+        # If this member name already exists in the enum, return it
+        if member_name in cls._member_map_:
+            return cls._member_map_[member_name]
+
+        # Create a new enum member
+        member = object.__new__(cls)
+        member._name_ = member_name
+        member._value_ = value
+
+        # Add it to the enum class
+        cls._member_map_[member_name] = member
+        cls._member_names_.append(member_name)
+        cls._member_type_ = str
+
+        # Store it in our dynamic values
+        cls._dynamic_values[value] = member
+
+        return member
+
+
+@json_schema_type
+class Api(Enum, metaclass=DynamicApiMeta):
+    """Enumeration of all available APIs in the Llama Stack system.
+    :cvar providers: Provider management and configuration
+    :cvar inference: Text generation, chat completions, and embeddings
+    :cvar safety: Content moderation and safety shields
+    :cvar agents: Agent orchestration and execution
+    :cvar batches: Batch processing for asynchronous API requests
+    :cvar vector_io: Vector database operations and queries
+    :cvar datasetio: Dataset input/output operations
+    :cvar scoring: Model output evaluation and scoring
+    :cvar eval: Model evaluation and benchmarking framework
+    :cvar post_training: Fine-tuning and model training
+    :cvar tool_runtime: Tool execution and management
+    :cvar telemetry: Observability and system monitoring
+    :cvar models: Model metadata and management
+    :cvar shields: Safety shield implementations
+    :cvar datasets: Dataset creation and management
+    :cvar scoring_functions: Scoring function definitions
+    :cvar benchmarks: Benchmark suite management
+    :cvar tool_groups: Tool group organization
+    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
+    :cvar inspect: Built-in system inspection and introspection
+    """
+
+    providers = "providers"
+    inference = "inference"
+    safety = "safety"
+    agents = "agents"
+    batches = "batches"
+    vector_io = "vector_io"
+    datasetio = "datasetio"
+    scoring = "scoring"
+    eval = "eval"
+    post_training = "post_training"
+    tool_runtime = "tool_runtime"
+
+    models = "models"
+    shields = "shields"
+    vector_stores = "vector_stores"  # only used for routing table
+    datasets = "datasets"
+    scoring_functions = "scoring_functions"
+    benchmarks = "benchmarks"
+    tool_groups = "tool_groups"
+    files = "files"
+    prompts = "prompts"
+    conversations = "conversations"
+
+    # built-in API
+    inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: str | None = None
+
+
+class ExternalApiSpec(BaseModel):
+    """Specification for an external API implementation."""
+
+    module: str = Field(..., description="Python module containing the API implementation")
+    name: str = Field(..., description="Name of the API")
+    pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
+    protocol: str = Field(..., description="Name of the protocol class for the API")
+
+
+# Provider-related types (merged from providers/datatypes.py)
+# NOTE: These imports are forward references to avoid circular dependencies
+# They will be resolved at runtime when the classes are used
 
 
 class ModelsProtocolPrivate(Protocol):
diff --git a/src/llama-stack-api/llama_stack_api/apis/eval/eval.py b/src/llama-stack-api/llama_stack_api/eval.py
similarity index 93%
rename from src/llama-stack-api/llama_stack_api/apis/eval/eval.py
rename to src/llama-stack-api/llama_stack_api/eval.py
index dbb611832..7a11c221e 100644
--- a/src/llama-stack-api/llama_stack_api/apis/eval/eval.py
+++ b/src/llama-stack-api/llama_stack_api/eval.py
@@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.common.job_types import Job
-from llama_stack_api.apis.inference import SamplingParams, SystemMessage
-from llama_stack_api.apis.scoring import ScoringResult
-from llama_stack_api.apis.scoring_functions import ScoringFnParams
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.common.job_types import Job
+from llama_stack_api.inference import SamplingParams, SystemMessage
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring import ScoringResult
+from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/files/files.py b/src/llama-stack-api/llama_stack_api/files.py
similarity index 97%
rename from src/llama-stack-api/llama_stack_api/apis/files/files.py
rename to src/llama-stack-api/llama_stack_api/files.py
index 648120277..8a75a1c39 100644
--- a/src/llama-stack-api/llama_stack_api/apis/files/files.py
+++ b/src/llama-stack-api/llama_stack_api/files.py
@@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.common.responses import Order
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.responses import Order
+from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 # OpenAI Files API Models
diff --git a/src/llama-stack-api/llama_stack_api/apis/inference/inference.py b/src/llama-stack-api/llama_stack_api/inference.py
similarity index 99%
rename from src/llama-stack-api/llama_stack_api/apis/inference/inference.py
rename to src/llama-stack-api/llama_stack_api/inference.py
index b6e45a5c8..b42de95be 100644
--- a/src/llama-stack-api/llama_stack_api/apis/inference/inference.py
+++ b/src/llama-stack-api/llama_stack_api/inference.py
@@ -18,14 +18,14 @@ from fastapi import Body
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
-from llama_stack_api.apis.common.content_types import InterleavedContent
-from llama_stack_api.apis.common.responses import (
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.common.responses import (
     Order,
 )
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.models import Model
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.models import Model
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py b/src/llama-stack-api/llama_stack_api/inspect.py
similarity index 96%
rename from src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py
rename to src/llama-stack-api/llama_stack_api/inspect.py
index e271f4f7b..8326e9e6b 100644
--- a/src/llama-stack-api/llama_stack_api/apis/inspect/inspect.py
+++ b/src/llama-stack-api/llama_stack_api/inspect.py
@@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack_api.apis.version import (
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import (
     LLAMA_STACK_API_V1,
 )
-from llama_stack_api.providers.datatypes import HealthStatus
-from llama_stack_api.schema_utils import json_schema_type, webmethod
 
 # Valid values for the route filter parameter.
 # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
diff --git a/src/llama-stack-api/llama_stack_api/apis/models/models.py b/src/llama-stack-api/llama_stack_api/models.py
similarity index 96%
rename from src/llama-stack-api/llama_stack_api/apis/models/models.py
rename to src/llama-stack-api/llama_stack_api/models.py
index 59bc39622..833864ec2 100644
--- a/src/llama-stack-api/llama_stack_api/apis/models/models.py
+++ b/src/llama-stack-api/llama_stack_api/models.py
@@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 class CommonModelFields(BaseModel):
diff --git a/src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py b/src/llama-stack-api/llama_stack_api/openai_responses.py
similarity index 99%
rename from src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py
rename to src/llama-stack-api/llama_stack_api/openai_responses.py
index 9b6f296f7..70139a98a 100644
--- a/src/llama-stack-api/llama_stack_api/apis/agents/openai_responses.py
+++ b/src/llama-stack-api/llama_stack_api/openai_responses.py
@@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
 
-from llama_stack_api.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
 from llama_stack_api.schema_utils import json_schema_type, register_schema
+from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
 
 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
 # take their YAML and generate this file automatically. Their YAML is available.
diff --git a/src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py b/src/llama-stack-api/llama_stack_api/post_training.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py
rename to src/llama-stack-api/llama_stack_api/post_training.py
index 5b2243b19..0cc9277d9 100644
--- a/src/llama-stack-api/llama_stack_api/apis/post_training/post_training.py
+++ b/src/llama-stack-api/llama_stack_api/post_training.py
@@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.common.job_types import JobStatus
-from llama_stack_api.apis.common.training_types import Checkpoint
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.job_types import JobStatus
+from llama_stack_api.common.training_types import Checkpoint
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py b/src/llama-stack-api/llama_stack_api/prompts.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py
rename to src/llama-stack-api/llama_stack_api/prompts.py
index 16e42b76a..651d03e61 100644
--- a/src/llama-stack-api/llama_stack_api/apis/prompts/prompts.py
+++ b/src/llama-stack-api/llama_stack_api/prompts.py
@@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/providers/providers.py b/src/llama-stack-api/llama_stack_api/providers.py
similarity index 94%
rename from src/llama-stack-api/llama_stack_api/apis/providers/providers.py
rename to src/llama-stack-api/llama_stack_api/providers.py
index b5b9a875f..5b555b82f 100644
--- a/src/llama-stack-api/llama_stack_api/apis/providers/providers.py
+++ b/src/llama-stack-api/llama_stack_api/providers.py
@@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack_api.providers.datatypes import HealthResponse
+from llama_stack_api.datatypes import HealthResponse
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/providers/__init__.py b/src/llama-stack-api/llama_stack_api/providers/__init__.py
deleted file mode 100644
index 07a084f31..000000000
--- a/src/llama-stack-api/llama_stack_api/providers/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datatypes import *  # noqa: F403
diff --git a/src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py b/src/llama-stack-api/llama_stack_api/rag_tool.py
similarity index 98%
rename from src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py
rename to src/llama-stack-api/llama_stack_api/rag_tool.py
index 8cd748f96..b5edd51af 100644
--- a/src/llama-stack-api/llama_stack_api/apis/tools/rag_tool.py
+++ b/src/llama-stack-api/llama_stack_api/rag_tool.py
@@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal
 
 from pydantic import BaseModel, Field, field_validator
 
-from llama_stack_api.apis.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.content_types import URL, InterleavedContent
 
 
 class RRFRanker(BaseModel):
diff --git a/src/llama-stack-api/llama_stack_api/apis/resource.py b/src/llama-stack-api/llama_stack_api/resource.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/resource.py
rename to src/llama-stack-api/llama_stack_api/resource.py
diff --git a/src/llama-stack-api/llama_stack_api/apis/safety/safety.py b/src/llama-stack-api/llama_stack_api/safety.py
similarity index 94%
rename from src/llama-stack-api/llama_stack_api/apis/safety/safety.py
rename to src/llama-stack-api/llama_stack_api/safety.py
index 2377df7be..ef84be2ea 100644
--- a/src/llama-stack-api/llama_stack_api/apis/safety/safety.py
+++ b/src/llama-stack-api/llama_stack_api/safety.py
@@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import OpenAIMessageParam
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.shields import Shield
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py b/src/llama-stack-api/llama_stack_api/scoring.py
similarity index 95%
rename from src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py
rename to src/llama-stack-api/llama_stack_api/scoring.py
index 3206e9ec1..47d144d21 100644
--- a/src/llama-stack-api/llama_stack_api/apis/scoring/scoring.py
+++ b/src/llama-stack-api/llama_stack_api/scoring.py
@@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 # mapping of metric to value
 ScoringResultRow = dict[str, Any]
diff --git a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py b/src/llama-stack-api/llama_stack_api/scoring_functions.py
similarity index 97%
rename from src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py
rename to src/llama-stack-api/llama_stack_api/scoring_functions.py
index 8e2c950b6..f75336e54 100644
--- a/src/llama-stack-api/llama_stack_api/apis/scoring_functions/scoring_functions.py
+++ b/src/llama-stack-api/llama_stack_api/scoring_functions.py
@@ -16,10 +16,10 @@ from typing import (
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.common.type_system import ParamType
-from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.type_system import ParamType
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 # Perhaps more structure can be imposed on these functions. Maybe they could be associated
diff --git a/src/llama-stack-api/llama_stack_api/apis/shields/shields.py b/src/llama-stack-api/llama_stack_api/shields.py
similarity index 93%
rename from src/llama-stack-api/llama_stack_api/apis/shields/shields.py
rename to src/llama-stack-api/llama_stack_api/shields.py
index 5382ef892..2aeb83333 100644
--- a/src/llama-stack-api/llama_stack_api/apis/shields/shields.py
+++ b/src/llama-stack-api/llama_stack_api/shields.py
@@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 class CommonShieldFields(BaseModel):
diff --git a/src/llama-stack-api/llama_stack_api/apis/tools/tools.py b/src/llama-stack-api/llama_stack_api/tools.py
similarity index 96%
rename from src/llama-stack-api/llama_stack_api/apis/tools/tools.py
rename to src/llama-stack-api/llama_stack_api/tools.py
index 4c7ed1104..6571c2047 100644
--- a/src/llama-stack-api/llama_stack_api/apis/tools/tools.py
+++ b/src/llama-stack-api/llama_stack_api/tools.py
@@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol
 from pydantic import BaseModel
 from typing_extensions import runtime_checkable
 
-from llama_stack_api.apis.common.content_types import URL, InterleavedContent
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.resource import Resource, ResourceType
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.content_types import URL, InterleavedContent
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py b/src/llama-stack-api/llama_stack_api/vector_io.py
similarity index 99%
rename from src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py
rename to src/llama-stack-api/llama_stack_api/vector_io.py
index e49c689e8..053e569f4 100644
--- a/src/llama-stack-api/llama_stack_api/apis/vector_io/vector_io.py
+++ b/src/llama-stack-api/llama_stack_api/vector_io.py
@@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field
 
-from llama_stack_api.apis.common.tracing import telemetry_traceable
-from llama_stack_api.apis.inference import InterleavedContent
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
+from llama_stack_api.common.tracing import telemetry_traceable
+from llama_stack_api.inference import InterleavedContent
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.strong_typing.schema import register_schema
+from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 @json_schema_type
diff --git a/src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py b/src/llama-stack-api/llama_stack_api/vector_stores.py
similarity index 96%
rename from src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py
rename to src/llama-stack-api/llama_stack_api/vector_stores.py
index 66c2d23a2..0a1e6c53c 100644
--- a/src/llama-stack-api/llama_stack_api/apis/vector_stores/vector_stores.py
+++ b/src/llama-stack-api/llama_stack_api/vector_stores.py
@@ -8,7 +8,7 @@ from typing import Literal
 
 from pydantic import BaseModel
 
-from llama_stack_api.apis.resource import Resource, ResourceType
+from llama_stack_api.resource import Resource, ResourceType
 
 
 # Internal resource type for storing the vector store routing and other information
diff --git a/src/llama-stack-api/llama_stack_api/apis/version.py b/src/llama-stack-api/llama_stack_api/version.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/apis/version.py
rename to src/llama-stack-api/llama_stack_api/version.py
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 70ef5f3ab..1ad89f79a 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,7 +9,7 @@ import sys
 from pathlib import Path
 
 import yaml
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index 98efd5687..ad3f2aa04 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,7 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index 4f5f968a7..473de5b0d 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,7 +6,7 @@
 
 import sys
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index 3b0749777..c3f707b4f 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,7 +12,7 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
-from llama_stack_api.providers.datatypes import RemoteProviderConfig
+from llama_stack_api.datatypes import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index 191e18eef..9da2230b0 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,7 +6,7 @@
 import textwrap
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index 1e42cb5dc..5946234de 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,7 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
 
-from llama_stack_api.apis.conversations.conversations import (
+from llama_stack_api.conversations import (
     Conversation,
     ConversationDeletedResource,
     ConversationItem,
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index a6f94e640..13b5689f1 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,21 +9,21 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
-from llama_stack_api.apis.benchmarks import Benchmark, BenchmarkInput
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Dataset, DatasetInput
-from llama_stack_api.apis.eval import Eval
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.models import Model, ModelInput
-from llama_stack_api.apis.resource import Resource
-from llama_stack_api.apis.safety import Safety
-from llama_stack_api.apis.scoring import Scoring
-from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack_api.apis.shields import Shield, ShieldInput
-from llama_stack_api.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack_api.apis.vector_io import VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore, VectorStoreInput
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.benchmarks import Benchmark, BenchmarkInput
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Dataset, DatasetInput
+from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api.eval import Eval
+from llama_stack_api.inference import Inference
+from llama_stack_api.models import Model, ModelInput
+from llama_stack_api.resource import Resource
+from llama_stack_api.safety import Safety
+from llama_stack_api.scoring import Scoring
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnInput
+from llama_stack_api.shields import Shield, ShieldInput
+from llama_stack_api.tools import ToolGroup, ToolGroupInput, ToolRuntime
+from llama_stack_api.vector_io import VectorIO
+from llama_stack_api.vector_stores import VectorStore, VectorStoreInput
 from pydantic import BaseModel, Field, field_validator, model_validator
 
 from llama_stack.core.access_control.datatypes import AccessRule
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 0f33db0ca..82cb05851 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,7 +10,7 @@ import os
 from typing import Any
 
 import yaml
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index fa04ad2e6..42ddf1f82 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,7 +6,7 @@
 
 
 import yaml
-from llama_stack_api.apis.datatypes import Api, ExternalApiSpec
+from llama_stack_api.datatypes import Api, ExternalApiSpec
 
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index f9573fbfc..7ffba9101 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,14 +6,14 @@
 
 from importlib.metadata import version
 
-from llama_stack_api.apis.inspect import (
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.inspect import (
     HealthInfo,
     Inspect,
     ListRoutesResponse,
     RouteInfo,
     VersionInfo,
 )
-from llama_stack_api.providers.datatypes import HealthStatus
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index 68fb453c6..e5e1904dd 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,7 +7,7 @@
 import json
 from typing import Any
 
-from llama_stack_api.apis.prompts import ListPromptsResponse, Prompt, Prompts
+from llama_stack_api.prompts import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index 6b0c70818..bf918cd4f 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,8 +7,8 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api.apis.providers import ListProvidersResponse, ProviderInfo, Providers
-from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus
+from llama_stack_api.datatypes import HealthResponse, HealthStatus
+from llama_stack_api.providers import ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index a91634640..2e15aaed2 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,33 +8,17 @@ import importlib.metadata
 import inspect
 from typing import Any
 
-from llama_stack_api.apis.agents import Agents
-from llama_stack_api.apis.batches import Batches
-from llama_stack_api.apis.benchmarks import Benchmarks
-from llama_stack_api.apis.conversations import Conversations
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.datatypes import ExternalApiSpec
-from llama_stack_api.apis.eval import Eval
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference, InferenceProvider
-from llama_stack_api.apis.inspect import Inspect
-from llama_stack_api.apis.models import Models
-from llama_stack_api.apis.post_training import PostTraining
-from llama_stack_api.apis.prompts import Prompts
-from llama_stack_api.apis.providers import Providers as ProvidersAPI
-from llama_stack_api.apis.safety import Safety
-from llama_stack_api.apis.scoring import Scoring
-from llama_stack_api.apis.scoring_functions import ScoringFunctions
-from llama_stack_api.apis.shields import Shields
-from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
-from llama_stack_api.apis.vector_io import VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.agents import Agents
+from llama_stack_api.batches import Batches
+from llama_stack_api.benchmarks import Benchmarks
+from llama_stack_api.conversations import Conversations
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import (
     Api,
     BenchmarksProtocolPrivate,
     DatasetsProtocolPrivate,
+    ExternalApiSpec,
     ModelsProtocolPrivate,
     ProviderSpec,
     RemoteProviderConfig,
@@ -43,6 +27,22 @@ from llama_stack_api.providers.datatypes import (
     ShieldsProtocolPrivate,
     ToolGroupsProtocolPrivate,
 )
+from llama_stack_api.eval import Eval
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference, InferenceProvider
+from llama_stack_api.inspect import Inspect
+from llama_stack_api.models import Models
+from llama_stack_api.post_training import PostTraining
+from llama_stack_api.prompts import Prompts
+from llama_stack_api.providers import Providers as ProvidersAPI
+from llama_stack_api.safety import Safety
+from llama_stack_api.scoring import Scoring
+from llama_stack_api.scoring_functions import ScoringFunctions
+from llama_stack_api.shields import Shields
+from llama_stack_api.tools import ToolGroups, ToolRuntime
+from llama_stack_api.vector_io import VectorIO
+from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index 571331b62..dae0ba7b1 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api, RoutingTable
+from llama_stack_api.datatypes import Api, RoutingTable
 
 from llama_stack.core.datatypes import (
     AccessRule,
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index 05e7e3205..66bdefddf 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import DatasetPurpose, DataSource
-from llama_stack_api.providers.datatypes import RoutingTable
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import DatasetPurpose, DataSource
+from llama_stack_api.datatypes import RoutingTable
 
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index 2417a1a55..0e705f556 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,14 +6,14 @@
 
 from typing import Any
 
-from llama_stack_api.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
-from llama_stack_api.apis.scoring import (
+from llama_stack_api.datatypes import RoutingTable
+from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
+from llama_stack_api.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
     ScoringFnParams,
 )
-from llama_stack_api.providers.datatypes import RoutingTable
 
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index 08040161d..498ab29b1 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,13 +11,16 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
-from llama_stack_api.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack_api.apis.inference import (
+from llama_stack_api.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack_api.datatypes import HealthResponse, HealthStatus, RoutingTable
+from llama_stack_api.inference import (
     Inference,
     ListOpenAIChatCompletionResponse,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIChatCompletionToolCall,
     OpenAIChatCompletionToolCallFunction,
@@ -32,12 +35,7 @@ from llama_stack_api.apis.inference import (
     Order,
     RerankResponse,
 )
-from llama_stack_api.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack_api.apis.models import ModelType
-from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
+from llama_stack_api.models import ModelType
 from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
 from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
 from pydantic import TypeAdapter
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index aad36b340..f31229e71 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,11 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import RunShieldResponse, Safety
-from llama_stack_api.apis.safety.safety import ModerationObject
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.providers.datatypes import RoutingTable
+from llama_stack_api.datatypes import RoutingTable
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import ModerationObject, RunShieldResponse, Safety
+from llama_stack_api.shields import Shield
 
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index 0da39801e..e5db3f445 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
     URL,
 )
-from llama_stack_api.apis.tools import (
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolRuntime,
 )
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index d41129e85..6880e6322 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,9 +9,10 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
-from llama_stack_api.apis.common.content_types import InterleavedContent
-from llama_stack_api.apis.models import ModelType
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.datatypes import HealthResponse, HealthStatus, RoutingTable
+from llama_stack_api.models import ModelType
+from llama_stack_api.vector_io import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
@@ -32,7 +33,6 @@ from llama_stack_api.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
-from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 
 from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index a86c86b2a..76b5c0167 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
 
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index 7c08d494c..718fc5544 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.errors import ModelNotFoundError
-from llama_stack_api.apis.models import Model
-from llama_stack_api.apis.resource import ResourceType
-from llama_stack_api.providers.datatypes import Api, RoutingTable
+from llama_stack_api.common.errors import ModelNotFoundError
+from llama_stack_api.datatypes import Api, RoutingTable
+from llama_stack_api.models import Model
+from llama_stack_api.resource import ResourceType
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index 4a5f182c2..a17c23d2b 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,8 +7,8 @@
 import uuid
 from typing import Any
 
-from llama_stack_api.apis.common.errors import DatasetNotFoundError
-from llama_stack_api.apis.datasets import (
+from llama_stack_api.common.errors import DatasetNotFoundError
+from llama_stack_api.datasets import (
     Dataset,
     DatasetPurpose,
     Datasets,
@@ -18,7 +18,7 @@ from llama_stack_api.apis.datasets import (
     RowsDataSource,
     URIDataSource,
 )
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.resource import ResourceType
 
 from llama_stack.core.datatypes import (
     DatasetWithOwner,
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index 31ac5f239..d323a835d 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,8 +7,8 @@
 import time
 from typing import Any
 
-from llama_stack_api.apis.common.errors import ModelNotFoundError
-from llama_stack_api.apis.models import (
+from llama_stack_api.common.errors import ModelNotFoundError
+from llama_stack_api.models import (
     ListModelsResponse,
     Model,
     Models,
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index 597393e84..fcc0a8007 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import ParamType
-from llama_stack_api.apis.resource import ResourceType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import ParamType
+from llama_stack_api.resource import ResourceType
+from llama_stack_api.scoring_functions import (
     ListScoringFunctionsResponse,
     ScoringFn,
     ScoringFnParams,
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index cb198b843..326ebca0d 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api.apis.resource import ResourceType
-from llama_stack_api.apis.shields import ListShieldsResponse, Shield, Shields
+from llama_stack_api.resource import ResourceType
+from llama_stack_api.shields import ListShieldsResponse, Shield, Shields
 
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index 91481d325..c49f36f6e 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,9 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.common.errors import ToolGroupNotFoundError
-from llama_stack_api.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.errors import ToolGroupNotFoundError
+from llama_stack_api.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
 
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index ebf0be421..d9d3ce518 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,12 +6,12 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack_api.apis.models import ModelType
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack_api.models import ModelType
+from llama_stack_api.resource import ResourceType
 
 # Removed VectorStores import to avoid exposing public API
-from llama_stack_api.apis.vector_io.vector_io import (
+from llama_stack_api.vector_io import (
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index bd7f3e188..a0a4c4b12 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,7 +11,7 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
-from llama_stack_api.apis.common.errors import TokenValidationError
+from llama_stack_api.common.errors import TokenValidationError
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index 99270e5df..34319dcfa 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,7 +10,7 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
-from llama_stack_api.apis.datatypes import Api, ExternalApiSpec
+from llama_stack_api.datatypes import Api, ExternalApiSpec
 from llama_stack_api.schema_utils import WebMethod
 from starlette.routing import Route
 
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 7f8de18da..6a6c7b2c9 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,9 +28,9 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
-from llama_stack_api.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datatypes import Api
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index a21f4cd1d..983249884 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,27 +12,27 @@ import tempfile
 from typing import Any
 
 import yaml
-from llama_stack_api.apis.agents import Agents
-from llama_stack_api.apis.batches import Batches
-from llama_stack_api.apis.benchmarks import Benchmarks
-from llama_stack_api.apis.conversations import Conversations
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.eval import Eval
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.inspect import Inspect
-from llama_stack_api.apis.models import Models
-from llama_stack_api.apis.post_training import PostTraining
-from llama_stack_api.apis.prompts import Prompts
-from llama_stack_api.apis.providers import Providers
-from llama_stack_api.apis.safety import Safety
-from llama_stack_api.apis.scoring import Scoring
-from llama_stack_api.apis.scoring_functions import ScoringFunctions
-from llama_stack_api.apis.shields import Shields
-from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
-from llama_stack_api.apis.vector_io import VectorIO
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.agents import Agents
+from llama_stack_api.batches import Batches
+from llama_stack_api.benchmarks import Benchmarks
+from llama_stack_api.conversations import Conversations
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import Api
+from llama_stack_api.eval import Eval
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference
+from llama_stack_api.inspect import Inspect
+from llama_stack_api.models import Models
+from llama_stack_api.post_training import PostTraining
+from llama_stack_api.prompts import Prompts
+from llama_stack_api.providers import Providers
+from llama_stack_api.safety import Safety
+from llama_stack_api.scoring import Scoring
+from llama_stack_api.scoring_functions import ScoringFunctions
+from llama_stack_api.shields import Shields
+from llama_stack_api.tools import ToolGroups, ToolRuntime
+from llama_stack_api.vector_io import VectorIO
 
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index 62a2f552f..f5340b2e2 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 
 from llama_stack.core.datatypes import (
     BuildProvider,
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index b69a36485..53ee71a7e 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 
 from llama_stack.core.datatypes import (
     BuildProvider,
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index bba0b5d88..52c9c73ed 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.datasets import DatasetPurpose, URIDataSource
+from llama_stack_api.models import ModelType
 
 from llama_stack.core.datatypes import (
     BenchmarkInput,
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 3d2f4b1fd..6bd4f04be 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,7 +7,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import RemoteProviderSpec
+from llama_stack_api.datatypes import RemoteProviderSpec
 
 from llama_stack.core.datatypes import (
     BuildProvider,
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index 0ae188d0a..3ef116821 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,8 +10,8 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
-from llama_stack_api.apis.datasets import DatasetPurpose
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.datasets import DatasetPurpose
+from llama_stack_api.models import ModelType
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index ca2975eee..4917ccca5 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.apis.agents import (
+from llama_stack_api.agents import (
     Agents,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
@@ -14,16 +14,16 @@ from llama_stack_api.apis.agents import (
     OpenAIResponseInputTool,
     OpenAIResponseObject,
     Order,
+    ResponseGuardrail,
 )
-from llama_stack_api.apis.agents.agents import ResponseGuardrail
-from llama_stack_api.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
-from llama_stack_api.apis.conversations import Conversations
-from llama_stack_api.apis.inference import (
+from llama_stack_api.conversations import Conversations
+from llama_stack_api.inference import (
     Inference,
 )
-from llama_stack_api.apis.safety import Safety
-from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
-from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
+from llama_stack_api.safety import Safety
+from llama_stack_api.tools import ToolGroups, ToolRuntime
+from llama_stack_api.vector_io import VectorIO
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 63c872d2a..b2d604247 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,9 +8,17 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api.apis.agents import Order
-from llama_stack_api.apis.agents.agents import ResponseGuardrailSpec
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.agents import Order, ResponseGuardrailSpec
+from llama_stack_api.common.errors import (
+    InvalidConversationIdError,
+)
+from llama_stack_api.conversations import ConversationItem, Conversations
+from llama_stack_api.inference import (
+    Inference,
+    OpenAIMessageParam,
+    OpenAISystemMessageParam,
+)
+from llama_stack_api.openai_responses import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
@@ -24,19 +32,9 @@ from llama_stack_api.apis.agents.openai_responses import (
     OpenAIResponseText,
     OpenAIResponseTextFormat,
 )
-from llama_stack_api.apis.common.errors import (
-    InvalidConversationIdError,
-)
-from llama_stack_api.apis.conversations import Conversations
-from llama_stack_api.apis.conversations.conversations import ConversationItem
-from llama_stack_api.apis.inference import (
-    Inference,
-    OpenAIMessageParam,
-    OpenAISystemMessageParam,
-)
-from llama_stack_api.apis.safety import Safety
-from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
-from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.safety import Safety
+from llama_stack_api.tools import ToolGroups, ToolRuntime
+from llama_stack_api.vector_io import VectorIO
 from pydantic import BaseModel, TypeAdapter
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 7329df14f..2f36f14a8 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,7 +8,17 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.inference import (
+    Inference,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionToolCall,
+    OpenAIChoice,
+    OpenAIMessageParam,
+)
+from llama_stack_api.openai_responses import (
     AllowedToolsFilter,
     ApprovalFilter,
     MCPListToolsTool,
@@ -56,16 +66,6 @@ from llama_stack_api.apis.agents.openai_responses import (
     OpenAIResponseUsageOutputTokensDetails,
     WebSearchToolTypes,
 )
-from llama_stack_api.apis.inference import (
-    Inference,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionToolCall,
-    OpenAIChoice,
-    OpenAIMessageParam,
-)
 
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
@@ -1024,7 +1024,7 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
-        from llama_stack_api.apis.tools import ToolDef
+        from llama_stack_api.tools import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
         from llama_stack.models.llama.datatypes import ToolDefinition
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index ca3d8e15b..41be0969c 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,7 +9,18 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.common.content_types import (
+    ImageContentItem,
+    TextContentItem,
+)
+from llama_stack_api.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
+    OpenAIImageURL,
+    OpenAIToolMessageParam,
+)
+from llama_stack_api.openai_responses import (
     OpenAIResponseInputToolFileSearch,
     OpenAIResponseInputToolMCP,
     OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
@@ -25,19 +36,8 @@ from llama_stack_api.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageFileSearchToolCallResults,
     OpenAIResponseOutputMessageWebSearchToolCall,
 )
-from llama_stack_api.apis.common.content_types import (
-    ImageContentItem,
-    TextContentItem,
-)
-from llama_stack_api.apis.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIChatCompletionToolCall,
-    OpenAIImageURL,
-    OpenAIToolMessageParam,
-)
-from llama_stack_api.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
-from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.tools import ToolGroups, ToolInvocationResult, ToolRuntime
+from llama_stack_api.vector_io import VectorIO
 
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
@@ -398,7 +398,7 @@ class ToolExecutor:
         # Build output message
         message: Any
         if mcp_tool_to_server and function.name in mcp_tool_to_server:
-            from llama_stack_api.apis.agents.openai_responses import (
+            from llama_stack_api.openai_responses import (
                 OpenAIResponseOutputMessageMCPCall,
             )
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 44f875851..8fa8cdb9d 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,7 +7,8 @@
 from dataclasses import dataclass
 from typing import cast
 
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
+from llama_stack_api.openai_responses import (
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseInputToolFileSearch,
@@ -23,7 +24,6 @@ from llama_stack_api.apis.agents.openai_responses import (
     OpenAIResponseTool,
     OpenAIResponseToolMCP,
 )
-from llama_stack_api.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
 from openai.types.chat import ChatCompletionToolParam
 from pydantic import BaseModel
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 68b72b4cc..3ccf489f2 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -9,26 +9,8 @@ import re
 import uuid
 from collections.abc import Sequence
 
-from llama_stack_api.apis.agents.agents import ResponseGuardrailSpec
-from llama_stack_api.apis.agents.openai_responses import (
-    OpenAIResponseAnnotationFileCitation,
-    OpenAIResponseInput,
-    OpenAIResponseInputFunctionToolCallOutput,
-    OpenAIResponseInputMessageContent,
-    OpenAIResponseInputMessageContentImage,
-    OpenAIResponseInputMessageContentText,
-    OpenAIResponseInputTool,
-    OpenAIResponseMCPApprovalRequest,
-    OpenAIResponseMCPApprovalResponse,
-    OpenAIResponseMessage,
-    OpenAIResponseOutputMessageContent,
-    OpenAIResponseOutputMessageContentOutputText,
-    OpenAIResponseOutputMessageFunctionToolCall,
-    OpenAIResponseOutputMessageMCPCall,
-    OpenAIResponseOutputMessageMCPListTools,
-    OpenAIResponseText,
-)
-from llama_stack_api.apis.inference import (
+from llama_stack_api.agents import ResponseGuardrailSpec
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartParam,
@@ -48,7 +30,25 @@ from llama_stack_api.apis.inference import (
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
 )
-from llama_stack_api.apis.safety import Safety
+from llama_stack_api.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
+    OpenAIResponseMessage,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseText,
+)
+from llama_stack_api.safety import Safety
 
 
 async def convert_chat_choice_to_response_message(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index c6326a103..78cea0864 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,8 +6,8 @@
 
 import asyncio
 
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import Safety, SafetyViolation, ViolationLevel
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import Safety, SafetyViolation, ViolationLevel
 
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index 80e0baf31..8b905a9e8 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,9 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.models import Models
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference
+from llama_stack_api.models import Models
 
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index 3aa276b96..109643d78 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,10 +13,10 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
-from llama_stack_api.apis.batches import Batches, BatchObject, ListBatchesResponse
-from llama_stack_api.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack_api.apis.files import Files, OpenAIFilePurpose
-from llama_stack_api.apis.inference import (
+from llama_stack_api.batches import Batches, BatchObject, ListBatchesResponse
+from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api.files import Files, OpenAIFilePurpose
+from llama_stack_api.inference import (
     Inference,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -28,7 +28,7 @@ from llama_stack_api.apis.inference import (
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
 )
-from llama_stack_api.apis.models import Models
+from llama_stack_api.models import Models
 from openai.types.batch import BatchError, Errors
 from pydantic import BaseModel
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index d5799e486..d18b5a449 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Dataset
-from llama_stack_api.providers.datatypes import DatasetsProtocolPrivate
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.datatypes import DatasetsProtocolPrivate
 
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index 6d6b5eafc..3c78a1a08 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,21 +6,21 @@
 import json
 from typing import Any
 
-from llama_stack_api.apis.agents import Agents
-from llama_stack_api.apis.benchmarks import Benchmark
-from llama_stack_api.apis.common.job_types import Job, JobStatus
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.eval import BenchmarkConfig, Eval, EvaluateResponse
-from llama_stack_api.apis.inference import (
+from llama_stack_api.agents import Agents
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.common.job_types import Job, JobStatus
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import BenchmarksProtocolPrivate
+from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse
+from llama_stack_api.inference import (
     Inference,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAISystemMessageParam,
     OpenAIUserMessageParam,
 )
-from llama_stack_api.apis.scoring import Scoring
-from llama_stack_api.providers.datatypes import BenchmarksProtocolPrivate
+from llama_stack_api.scoring import Scoring
 from tqdm import tqdm
 
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index 9770a3527..0e34cd64a 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,9 +10,9 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
-from llama_stack_api.apis.common.errors import ResourceNotFoundError
-from llama_stack_api.apis.common.responses import Order
-from llama_stack_api.apis.files import (
+from llama_stack_api.common.errors import ResourceNotFoundError
+from llama_stack_api.common.responses import Order
+from llama_stack_api.files import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index 1a5e9f1ba..caae17fc0 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.apis.inference import QuantizationConfig
+from llama_stack_api.inference import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
 from llama_stack.providers.utils.inference import supported_inference_models
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index bda9304e8..21e96c0a9 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,7 +8,7 @@ import math
 from typing import Optional
 
 import torch
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index e75d210df..940992c0c 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,23 +9,21 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.datatypes import ModelsProtocolPrivate
+from llama_stack_api.inference import (
     InferenceProvider,
     OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIChatCompletionUsage,
     OpenAIChoice,
+    OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIUserMessageParam,
     ToolChoice,
 )
-from llama_stack_api.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-)
-from llama_stack_api.apis.models import Model, ModelType
-from llama_stack_api.providers.datatypes import ModelsProtocolPrivate
+from llama_stack_api.models import Model, ModelType
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
@@ -377,7 +375,7 @@ class MetaReferenceInferenceImpl(
         # Convert tool calls to OpenAI format
         openai_tool_calls = None
         if decoded_message.tool_calls:
-            from llama_stack_api.apis.inference import (
+            from llama_stack_api.inference import (
                 OpenAIChatCompletionToolCall,
                 OpenAIChatCompletionToolCallFunction,
             )
@@ -442,7 +440,7 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
-        from llama_stack_api.apis.inference import (
+        from llama_stack_api.inference import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
             OpenAIChatCompletionToolCallFunction,
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 9e6a81543..946849223 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,18 +6,16 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.datatypes import ModelsProtocolPrivate
+from llama_stack_api.inference import (
     InferenceProvider,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAICompletionRequestWithExtraBody,
-)
-from llama_stack_api.apis.inference.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
 )
-from llama_stack_api.apis.models import ModelType
-from llama_stack_api.providers.datatypes import Model, ModelsProtocolPrivate
+from llama_stack_api.models import Model, ModelType
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.embedding_mixin import (
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 461a05733..072dfcc9d 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,7 +12,7 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.type_system import (
+from llama_stack_api.common.type_system import (
     ChatCompletionInputType,
     DialogType,
     StringType,
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index 1324ec6e5..37eb9973b 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,9 +6,9 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.post_training import (
     AlgorithmConfig,
     Checkpoint,
     DPOAlignmentConfig,
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index dd8e7ab80..bc9fb3b85 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,9 +12,9 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.post_training import (
     Checkpoint,
     DataConfig,
     LoraFinetuningConfig,
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index cd21abb1f..45cfe2e8d 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,9 +11,9 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.post_training import (
     Checkpoint,
     DPOAlignmentConfig,
     TrainingConfig,
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index 1da08cc75..649d26f11 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,8 +14,8 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.post_training import Checkpoint, TrainingConfig
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.post_training import Checkpoint, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
 if TYPE_CHECKING:
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index 83d580180..d18d45575 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,7 +13,7 @@
 from collections.abc import Callable
 
 import torch
-from llama_stack_api.apis.post_training import DatasetFormat
+from llama_stack_api.post_training import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index db0c5fb12..2cb01ed41 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,9 +6,9 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.post_training import (
     AlgorithmConfig,
     Checkpoint,
     DPOAlignmentConfig,
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 49d9cba5f..bf221c513 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,10 +12,10 @@ from pathlib import Path
 from typing import Any
 
 import torch
-from llama_stack_api.apis.common.training_types import PostTrainingMetric
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.common.training_types import PostTrainingMetric
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.post_training import (
     Checkpoint,
     DataConfig,
     LoraFinetuningConfig,
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 9de7e7450..1732c6bae 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,15 +10,16 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import (
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import (
+    ModerationObject,
+    ModerationObjectResults,
     RunShieldResponse,
     Safety,
     SafetyViolation,
     ViolationLevel,
 )
-from llama_stack_api.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack_api.apis.shields import Shield
+from llama_stack_api.shields import Shield
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import (
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 44d59aff3..beaba4572 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,22 +9,23 @@ import uuid
 from string import Template
 from typing import Any
 
-from llama_stack_api.apis.common.content_types import ImageContentItem, TextContentItem
-from llama_stack_api.apis.inference import (
+from llama_stack_api.common.content_types import ImageContentItem, TextContentItem
+from llama_stack_api.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.inference import (
     Inference,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIMessageParam,
     OpenAIUserMessageParam,
 )
-from llama_stack_api.apis.safety import (
+from llama_stack_api.safety import (
+    ModerationObject,
+    ModerationObjectResults,
     RunShieldResponse,
     Safety,
     SafetyViolation,
     ViolationLevel,
 )
-from llama_stack_api.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.shields import Shield
 
 from llama_stack.core.datatypes import Api
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index e533b78f5..531972478 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,17 +7,17 @@
 from typing import Any
 
 import torch
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import (
+from llama_stack_api.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import (
+    ModerationObject,
     RunShieldResponse,
     Safety,
     SafetyViolation,
     ShieldStore,
     ViolationLevel,
 )
-from llama_stack_api.apis.safety.safety import ModerationObject
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.shields import Shield
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 from llama_stack.core.utils.model_utils import model_local_dir
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index 9be889d52..d2e2aea8d 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,16 +5,16 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.scoring import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack_api.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
     ScoringResult,
 )
-from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack_api.providers.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 
 from llama_stack.core.datatypes import Api
 from llama_stack.providers.utils.common.data_schema_validator import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index 3b9f6a98f..d5525b27e 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,8 +8,8 @@ import json
 import re
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 883dd5c89..fd691af59 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
index 917fcfbe1..af1af88a1 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
index f234bd1ad..34c4ee0ef 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
index 108d1c5d5..8710ea01d 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
index 6b4c7f96e..34286927d 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     RegexParserScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
index 0d69cb9f1..3699ed93a 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     RegexParserScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
index cb6d64717..b05923a4c 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 4dcff6a5c..232bd36b0 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index 3d3511cc8..95892963f 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,8 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams, ScoringFnParamsType
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index 4473fbc3d..6bb5bf118 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,8 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams, ScoringFnParamsType
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index b3ed4a488..a2c8140c6 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,8 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index 2a689bd6c..4ed8f95aa 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,17 +17,17 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.scoring import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack_api.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
     ScoringResult,
     ScoringResultRow,
 )
-from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack_api.providers.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import Api
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
index 020a09a58..c0e769fe3 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
index 83385cc3d..3479473e1 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
index edd2e72cb..0c0e4a4a0 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
index 851facab6..0ecfb2adb 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
index ca967312a..bc4bfb999 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
index 230e6b77d..9af908675 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
index 220f59975..267f2ca6c 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
index f9ba0d80f..e71558618 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
index 1ba91be22..ffd242b61 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     BasicScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index 663ea04f3..c53170643 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,17 +5,17 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.scoring import (
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack_api.inference import Inference
+from llama_stack_api.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
     ScoringResult,
 )
-from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnParams
-from llama_stack_api.providers.datatypes import ScoringFunctionsProtocolPrivate
+from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 
 from llama_stack.core.datatypes import Api
 from llama_stack.providers.utils.common.data_schema_validator import (
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
index 03127de57..47c3a4e4e 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import (
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import (
     AggregationFunctionType,
     LLMAsJudgeScoringFnParams,
     ScoringFn,
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
index 7d1a387bd..7e7c69b16 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
@@ -4,8 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
 
 llm_as_judge_base = ScoringFn(
     identifier="llm-as-judge::base",
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index fb79c27a1..de0d15b69 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,9 @@
 import re
 from typing import Any
 
-from llama_stack_api.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFnParams
+from llama_stack_api.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFnParams
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index 0dae5034c..e958f1112 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 from .config import RagToolRuntimeConfig
 
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index 6d5b0be57..b2f0d884b 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,9 +6,9 @@
 
 
 from jinja2 import Template
-from llama_stack_api.apis.common.content_types import InterleavedContent
-from llama_stack_api.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack_api.apis.tools.rag_tool import (
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
+from llama_stack_api.rag_tool import (
     DefaultRAGQueryGeneratorConfig,
     LLMRAGQueryGeneratorConfig,
     RAGQueryGenerator,
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index c7098dd98..9a5dc63b7 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,31 +12,29 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
     URL,
     InterleavedContent,
     InterleavedContentItem,
     TextContentItem,
 )
-from llama_stack_api.apis.files import Files, OpenAIFilePurpose
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.tools import (
+from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
+from llama_stack_api.files import Files, OpenAIFilePurpose
+from llama_stack_api.inference import Inference
+from llama_stack_api.rag_tool import RAGDocument, RAGQueryConfig, RAGQueryResult
+from llama_stack_api.tools import (
     ListToolDefsResponse,
-    RAGDocument,
-    RAGQueryConfig,
-    RAGQueryResult,
     ToolDef,
     ToolGroup,
     ToolInvocationResult,
     ToolRuntime,
 )
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.vector_io import (
     QueryChunksResponse,
     VectorIO,
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
-from llama_stack_api.providers.datatypes import ToolGroupsProtocolPrivate
 from pydantic import TypeAdapter
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
index baf08624c..a99c94012 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
index 8d250b384..6c7a4efd8 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 3fc4cdfc0..4c5362a8d 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,12 +12,12 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference, InterleavedContent
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference, InterleavedContent
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
index abd3d5a2b..4aa9db7ec 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
index 537b8d3a0..f0caeeda6 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index cd1ce4f82..acaf24eb9 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
 
 from .config import SQLiteVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 6377b5597..4a5d90f9a 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,12 +12,12 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index e71c28620..824741ed1 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py
index 859e72d34..0f64d4a48 100644
--- a/src/llama_stack/providers/registry/batches.py
+++ b/src/llama_stack/providers/registry/batches.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py
index 2794d078f..fd8055cbb 100644
--- a/src/llama_stack/providers/registry/datasetio.py
+++ b/src/llama_stack/providers/registry/datasetio.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py
index 52b0eb9da..5fe973b33 100644
--- a/src/llama_stack/providers/registry/eval.py
+++ b/src/llama_stack/providers/registry/eval.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index 2f3b716d7..d1e7c8fba 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
 
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 939899b35..1a4efd49e 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py
index 18a3fc64a..6b0ad3ecd 100644
--- a/src/llama_stack/providers/registry/post_training.py
+++ b/src/llama_stack/providers/registry/post_training.py
@@ -7,7 +7,7 @@
 
 from typing import cast
 
-from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 # We provide two versions of these providers so that distributions can package the appropriate version of torch.
 # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
diff --git a/src/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py
index 65b49b7ca..307911ebf 100644
--- a/src/llama_stack/providers/registry/safety.py
+++ b/src/llama_stack/providers/registry/safety.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py
index ee9992dfe..58679a97c 100644
--- a/src/llama_stack/providers/registry/scoring.py
+++ b/src/llama_stack/providers/registry/scoring.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 1c88c7766..7b09881d3 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py
index b4d96a0c1..b079a3644 100644
--- a/src/llama_stack/providers/registry/vector_io.py
+++ b/src/llama_stack/providers/registry/vector_io.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.datatypes import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
@@ -244,7 +244,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack_api.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 23e6a8719..03cfb2bac 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,10 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Dataset
-from llama_stack_api.providers.datatypes import DatasetsProtocolPrivate
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Dataset
+from llama_stack_api.datatypes import DatasetsProtocolPrivate
 
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index 3b0878337..b222f9962 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,10 +7,10 @@
 from typing import Any
 
 import aiohttp
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.common.responses import PaginatedResponse
-from llama_stack_api.apis.common.type_system import ParamType
-from llama_stack_api.apis.datasets import Dataset
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api.common.type_system import ParamType
+from llama_stack_api.datasets import Dataset
 
 from .config import NvidiaDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index f67bb5475..0b4113177 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,15 +6,15 @@
 from typing import Any
 
 import requests
-from llama_stack_api.apis.agents import Agents
-from llama_stack_api.apis.benchmarks import Benchmark
-from llama_stack_api.apis.common.job_types import Job, JobStatus
-from llama_stack_api.apis.datasetio import DatasetIO
-from llama_stack_api.apis.datasets import Datasets
-from llama_stack_api.apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.scoring import Scoring, ScoringResult
-from llama_stack_api.providers.datatypes import BenchmarksProtocolPrivate
+from llama_stack_api.agents import Agents
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.common.job_types import Job, JobStatus
+from llama_stack_api.datasetio import DatasetIO
+from llama_stack_api.datasets import Datasets
+from llama_stack_api.datatypes import BenchmarksProtocolPrivate
+from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse
+from llama_stack_api.inference import Inference
+from llama_stack_api.scoring import Scoring, ScoringResult
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index cd10ee22a..3db553c40 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,9 +8,9 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
-from llama_stack_api.apis.common.errors import ResourceNotFoundError
-from llama_stack_api.apis.common.responses import Order
-from llama_stack_api.apis.files import (
+from llama_stack_api.common.errors import ResourceNotFoundError
+from llama_stack_api.common.responses import Order
+from llama_stack_api.files import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 1f5894f12..2a98bc620 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,9 +17,9 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
-from llama_stack_api.apis.common.errors import ResourceNotFoundError
-from llama_stack_api.apis.common.responses import Order
-from llama_stack_api.apis.files import (
+from llama_stack_api.common.errors import ResourceNotFoundError
+from llama_stack_api.common.responses import Order
+from llama_stack_api.files import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 3b837cbd4..491cda76c 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,7 +6,7 @@
 
 from collections.abc import AsyncIterator, Iterable
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 0c5292dcd..90f7c550e 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,7 +6,7 @@
 
 from urllib.parse import urljoin
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 3685234fc..4f0327b4f 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,7 +7,7 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
-from llama_stack_api.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
+from llama_stack_api.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index 0335a11c4..af3a567e0 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 3aa73a59f..61587bcd3 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.inference.inference import (
+from llama_stack_api.inference import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
index cb013f4c5..fb3ff090a 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/__init__.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.inference import Inference
+from llama_stack_api.inference import Inference
 
 from .config import NVIDIAConfig
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index dd9beca59..fc09f9448 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,15 +8,13 @@
 from collections.abc import Iterable
 
 import aiohttp
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
     RerankData,
     RerankResponse,
 )
-from llama_stack_api.apis.inference.inference import (
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-)
-from llama_stack_api.apis.models import Model, ModelType
+from llama_stack_api.models import Model, ModelType
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py
index 8fd801ec8..e21e64ee7 100644
--- a/src/llama_stack/providers/remote/inference/oci/__init__.py
+++ b/src/llama_stack/providers/remote/inference/oci/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.inference import InferenceProvider
+from llama_stack_api.inference import InferenceProvider
 
 from .config import OCIConfig
 
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 294ac2ebc..2dedfd434 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,11 +10,11 @@ from typing import Any
 
 import httpx
 import oci
-from llama_stack_api.apis.inference.inference import (
+from llama_stack_api.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 4d5bc5656..35022eedf 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,12 +7,12 @@
 
 import asyncio
 
-from llama_stack_api.apis.common.errors import UnsupportedModelError
-from llama_stack_api.apis.models import Model
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api.common.errors import UnsupportedModelError
+from llama_stack_api.datatypes import (
     HealthResponse,
     HealthStatus,
 )
+from llama_stack_api.models import Model
 from ollama import AsyncClient as AsyncOllamaClient
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 71ae86efb..01f2b5619 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,7 +6,7 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     Inference,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -16,7 +16,7 @@ from llama_stack_api.apis.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack_api.apis.models import Model
+from llama_stack_api.models import Model
 from openai import AsyncOpenAI
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index 7afc4d073..c2ea2c521 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,7 +6,7 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index f444bc225..36684090d 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,7 +8,7 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index 728cb910c..06dbf26c7 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,12 +8,12 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
 )
-from llama_stack_api.apis.inference.inference import OpenAIEmbeddingUsage
-from llama_stack_api.apis.models import Model
+from llama_stack_api.models import Model
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index 775599b50..24bde4a8d 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,16 +7,16 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
-from llama_stack_api.apis.inference import (
+from llama_stack_api.datatypes import (
+    HealthResponse,
+    HealthStatus,
+)
+from llama_stack_api.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     ToolChoice,
 )
-from llama_stack_api.providers.datatypes import (
-    HealthResponse,
-    HealthStatus,
-)
 from pydantic import ConfigDict
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index a850bb65e..1b999ba09 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,7 +9,7 @@ from typing import Any
 
 import litellm
 import requests
-from llama_stack_api.apis.inference.inference import (
+from llama_stack_api.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -19,8 +19,7 @@ from llama_stack_api.apis.inference.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack_api.apis.models import Model
-from llama_stack_api.apis.models.models import ModelType
+from llama_stack_api.models import Model, ModelType
 
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
@@ -238,7 +237,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack_api.apis.inference import OpenAIEmbeddingUsage
+        from llama_stack_api.inference import OpenAIEmbeddingUsage
 
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md
index d16b31db5..f998f44ba 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -128,7 +128,7 @@ client.post_training.job.cancel(job_uuid="your-job-id")
 #### 1. Register the model
 
 ```python
-from llama_stack_api.apis.models import Model, ModelType
+from llama_stack_api.models import Model, ModelType
 
 client.models.register(
     model_id="test-example-model@v1",
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index 327859a4d..7fa1bd89c 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,7 +8,7 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.post_training import (
     AlgorithmConfig,
     DPOAlignmentConfig,
     JobStatus,
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index 5187fdd34..b1f638f27 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,7 +7,7 @@
 import warnings
 from typing import Any
 
-from llama_stack_api.apis.post_training import TrainingConfig
+from llama_stack_api.post_training import TrainingConfig
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 48c69ce12..12e17802b 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,15 +7,15 @@
 import json
 from typing import Any
 
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import (
+from llama_stack_api.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import (
     RunShieldResponse,
     Safety,
     SafetyViolation,
     ViolationLevel,
 )
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.shields import Shield
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md
index ccb85e1b6..f3ec0f1e0 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/README.md
+++ b/src/llama_stack/providers/remote/safety/nvidia/README.md
@@ -42,8 +42,8 @@ client.initialize()
 #### Create a safety shield
 
 ```python
-from llama_stack_api.apis.safety import Shield
-from llama_stack_api.apis.inference import Message
+from llama_stack_api.safety import Shield
+from llama_stack_api.inference import Message
 
 # Create a safety shield
 shield = Shield(
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index 31482fe3e..d40c26f77 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,10 +7,10 @@
 from typing import Any
 
 import requests
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
+from llama_stack_api.shields import Shield
 
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 287fed5fc..14ef39431 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,15 +8,15 @@ from typing import Any
 
 import litellm
 import requests
-from llama_stack_api.apis.inference import OpenAIMessageParam
-from llama_stack_api.apis.safety import (
+from llama_stack_api.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.safety import (
     RunShieldResponse,
     Safety,
     SafetyViolation,
     ViolationLevel,
 )
-from llama_stack_api.apis.shields import Shield
-from llama_stack_api.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack_api.shields import Shield
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 688f7fc6e..11a917432 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,15 +8,15 @@ import json
 from typing import Any
 
 import httpx
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.tools import (
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
     ToolInvocationResult,
     ToolRuntime,
 )
-from llama_stack_api.providers.datatypes import ToolGroupsProtocolPrivate
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index d53d78fe4..9247e5543 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,15 +7,15 @@
 from typing import Any
 
 import httpx
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.tools import (
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
     ToolInvocationResult,
     ToolRuntime,
 )
-from llama_stack_api.providers.datatypes import ToolGroupsProtocolPrivate
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.models.llama.datatypes import BuiltinTool
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 394a67493..7beaebc5f 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,15 +7,14 @@
 from typing import Any
 from urllib.parse import urlparse
 
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.datatypes import Api
-from llama_stack_api.apis.tools import (
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.datatypes import Api, ToolGroupsProtocolPrivate
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolGroup,
     ToolInvocationResult,
     ToolRuntime,
 )
-from llama_stack_api.providers.datatypes import ToolGroupsProtocolPrivate
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index f61c104e0..9fe525ca5 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,15 +8,15 @@ import json
 from typing import Any
 
 import httpx
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.tools import (
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
     ToolInvocationResult,
     ToolRuntime,
 )
-from llama_stack_api.providers.datatypes import ToolGroupsProtocolPrivate
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index d87d1dff9..ed4a9d4ba 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,15 +8,15 @@ import json
 from typing import Any
 
 import httpx
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.tools import (
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
     ToolInvocationResult,
     ToolRuntime,
 )
-from llama_stack_api.providers.datatypes import ToolGroupsProtocolPrivate
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
index dd59ae7f4..685feb0b8 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 6aefc9632..b7e6957cd 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,11 +9,11 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference, InterleavedContent
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference, InterleavedContent
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
index 99f307807..1237b75d4 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 96d41d2b9..b7cc4066e 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,12 +8,12 @@ import asyncio
 import os
 from typing import Any
 
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference, InterleavedContent
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference, InterleavedContent
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 8461021fd..e66644b2c 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 6baac6c7b..e9b5664ae 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,12 +8,12 @@ import heapq
 from typing import Any
 
 import psycopg2
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference, InterleavedContent
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference, InterleavedContent
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index f87414e98..b56ff9811 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index fbb61b7b3..86ddb351a 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,18 +9,18 @@ import hashlib
 import uuid
 from typing import Any
 
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference, InterleavedContent
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference, InterleavedContent
+from llama_stack_api.vector_io import (
     Chunk,
     QueryChunksResponse,
     VectorIO,
     VectorStoreChunkingStrategy,
     VectorStoreFileObject,
 )
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index fdc46bf77..7ce2607ea 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.providers.datatypes import Api, ProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 69a8e93d5..715daa045 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,13 +8,13 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
-from llama_stack_api.apis.common.content_types import InterleavedContent
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.common.content_types import InterleavedContent
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.datatypes import VectorStoresProtocolPrivate
+from llama_stack_api.files import Files
+from llama_stack_api.inference import Inference
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index df1d50153..d05c656a6 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,7 +7,7 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api.apis.common.type_system import (
+from llama_stack_api.common.type_system import (
     ChatCompletionInputType,
     CompletionInputType,
     StringType,
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 5593ca381..6e3d3f4a7 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,7 +7,7 @@
 import json
 
 from fastapi import Request
-from llama_stack_api.apis.files import ExpiresAfter
+from llama_stack_api.files import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
 
diff --git a/src/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py
index b86f4790e..aad158085 100644
--- a/src/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/src/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
 if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     ModelStore,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 439ddbf2d..79a23d249 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,7 +6,7 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
     OpenAICompletionWithInputMessages,
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index e18aabe34..cff3e2210 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,7 +9,7 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     InferenceProvider,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index c9848a62e..d8589ba4f 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,9 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.errors import UnsupportedModelError
-from llama_stack_api.apis.models import ModelType
-from llama_stack_api.providers.datatypes import Model, ModelsProtocolPrivate
+from llama_stack_api.common.errors import UnsupportedModelError
+from llama_stack_api.datatypes import ModelsProtocolPrivate
+from llama_stack_api.models import Model, ModelType
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index 9feb28c0c..3f9e4aa38 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,13 +20,13 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
     URL,
     ImageContentItem,
     TextContentItem,
     _URLOrData,
 )
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
     OpenAIResponseFormatParam,
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index b54bf750a..0b41b092f 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,7 +10,7 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     Model,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -23,7 +23,7 @@ from llama_stack_api.apis.inference import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 from openai import AsyncOpenAI
 from pydantic import BaseModel, ConfigDict
 
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index 9c4756d71..2c59319bc 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,13 +12,13 @@ import re
 from typing import Any
 
 import httpx
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
     ImageContentItem,
     InterleavedContent,
     InterleavedContentItem,
     TextContentItem,
 )
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     CompletionRequest,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
diff --git a/src/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py
index 89c770c17..bd0ceb26c 100644
--- a/src/llama_stack/providers/utils/memory/file_utils.py
+++ b/src/llama_stack/providers/utils/memory/file_utils.py
@@ -8,7 +8,7 @@ import base64
 import mimetypes
 import os
 
-from llama_stack_api.apis.common.content_types import URL
+from llama_stack_api.common.content_types import URL
 
 
 def data_url_from_file(file_path: str) -> URL:
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 44d306e85..a89d53d11 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,9 +13,9 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.files import Files, OpenAIFileObject
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.files import Files, OpenAIFileObject
+from llama_stack_api.vector_io import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
@@ -41,7 +41,7 @@ from llama_stack_api.apis.vector_io import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from llama_stack_api.apis.vector_stores import VectorStore
+from llama_stack_api.vector_stores import VectorStore
 from pydantic import TypeAdapter
 
 from llama_stack.core.id_generation import generate_object_id
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 3c04b28cf..775d47dbe 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,15 +14,15 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
     URL,
     InterleavedContent,
 )
-from llama_stack_api.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
-from llama_stack_api.apis.tools import RAGDocument
-from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
+from llama_stack_api.inference import OpenAIEmbeddingsRequestWithExtraBody
+from llama_stack_api.rag_tool import RAGDocument
+from llama_stack_api.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
diff --git a/src/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py
index 737088c0a..edfd1e33b 100644
--- a/src/llama_stack/providers/utils/pagination.py
+++ b/src/llama_stack/providers/utils/pagination.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
 
 
 def paginate_records(
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index e263122b8..095a1e43a 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,10 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.agents import (
+from llama_stack_api.agents import (
     Order,
 )
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.inference import OpenAIMessageParam
+from llama_stack_api.openai_responses import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
@@ -15,7 +16,6 @@ from llama_stack_api.apis.agents.openai_responses import (
     OpenAIResponseObject,
     OpenAIResponseObjectWithInput,
 )
-from llama_stack_api.apis.inference import OpenAIMessageParam
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
diff --git a/src/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
index 909cdef00..ef59e0ed0 100644
--- a/src/llama_stack/providers/utils/scoring/aggregation_utils.py
+++ b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
@@ -6,8 +6,8 @@
 import statistics
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringResultRow
-from llama_stack_api.apis.scoring_functions import AggregationFunctionType
+from llama_stack_api.scoring import ScoringResultRow
+from llama_stack_api.scoring_functions import AggregationFunctionType
 
 
 def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index c97c40b2c..aee998c01 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,8 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack_api.apis.scoring import ScoringFnParams, ScoringResultRow
-from llama_stack_api.apis.scoring_functions import ScoringFn
+from llama_stack_api.scoring import ScoringFnParams, ScoringResultRow
+from llama_stack_api.scoring_functions import ScoringFn
 
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index 58fc7ab9f..e21aba382 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,7 +8,7 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
 from pydantic import BaseModel
 
 
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index bdda5cd50..74c164c73 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,7 +6,7 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index c7f6d5f46..8b2560cb5 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,13 +10,13 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
-from llama_stack_api.apis.common.content_types import (
+from llama_stack_api.common.content_types import (
     ImageContentItem,
     InterleavedContentItem,
     TextContentItem,
     _URLOrData,
 )
-from llama_stack_api.apis.tools import (
+from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolDef,
     ToolInvocationResult,
diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
index f79fc7e83..7318a697f 100644
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@@ -6,9 +6,9 @@
 
 from typing import Protocol
 
-from llama_stack_api.apis.version import LLAMA_STACK_API_V1
-from llama_stack_api.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
+from llama_stack_api.datatypes import Api, ProviderSpec, RemoteProviderSpec
 from llama_stack_api.schema_utils import webmethod
+from llama_stack_api.version import LLAMA_STACK_API_V1
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index 97711afce..770578311 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,7 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
-from llama_stack_api.apis.files import OpenAIFilePurpose
+from llama_stack_api.files import OpenAIFilePurpose
 
 
 class BatchHelper:
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index de7278857..f9cbf7987 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,7 +9,7 @@ from unittest.mock import patch
 
 import pytest
 import requests
-from llama_stack_api.apis.files import OpenAIFilePurpose
+from llama_stack_api.files import OpenAIFilePurpose
 
 from llama_stack.core.datatypes import User
 
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index cd4dad1a2..0b4186c3c 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,8 +15,8 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
-from llama_stack_api.apis.datatypes import Api
-from llama_stack_api.apis.inference.inference import (
+from llama_stack_api.datatypes import Api
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionUsage,
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index 2541acbba..8728d4aa4 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,7 +9,7 @@ import time
 import uuid
 
 import pytest
-from llama_stack_api.apis.post_training import (
+from llama_stack_api.post_training import (
     DataConfig,
     DatasetFormat,
     DPOAlignmentConfig,
diff --git a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
index 2a5d6c75f..4e80e1cdd 100644
--- a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
+++ b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack_api.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"Llama 4 Maverick model experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9170729, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E - Hugging Face\", \"content\": \"Model Architecture: The Llama 4 models are auto-regressive language models that use a mixture-of-experts (MoE) architecture and incorporate\", \"score\": 0.8021998, \"raw_content\": null}, {\"url\": \"https://www.ibm.com/new/announcements/meta-llama-4-maverick-and-llama-4-scout-now-available-in-watsonx-ai\", \"title\": \"Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx ...\", \"content\": \"# Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx.ai **IBM is excited to announce the addition of Meta\\u2019s latest generation of open models, Llama 4, to** **watsonx.ai****.** Llama 4 Scout and Llama 4 Maverick, the first mixture of experts (MoE) models released by Meta, provide frontier multimodal performance, high speeds, low cost, and industry leading context length. With the introduction of these latest offerings from Meta, IBM now supports a total of 13 Meta models in the expansive library of \\u00a0foundation models available in watsonx.ai. Trained on 40 trillion tokens of data, Llama 4 Scout offers performance rivalling or exceeding that of models with significantly larger active parameter counts while keeping costs and latency low. ## Llama 4 models on IBM watsonx\", \"score\": 0.78194773, \"raw_content\": null}, {\"url\": \"https://medium.com/@divyanshbhatiajm19/metas-llama-4-family-the-complete-guide-to-scout-maverick-and-behemoth-ai-models-in-2025-21a90c882e8a\", \"title\": \"Meta's Llama 4 Family: The Complete Guide to Scout, Maverick, and ...\", \"content\": \"# Meta\\u2019s Llama 4 Family: The Complete Guide to Scout, Maverick, and Behemoth AI Models in 2025 Feature Llama 4 Scout Llama 4 Maverick Llama 4 Behemoth **Total Parameters** 109B 400B ~2T **Active Parameters** 17B 17B 288B **Expert Count** 16 128 16 **Context Window** 10M tokens 1M tokens Not specified **Hardware Requirements** Single H100 GPU Single H100 DGX host Multiple GPUs **Inference Cost** Not specified $0.19-$0.49 per 1M tokens Not specified **Release Status** Available now Available now In training **Primary Use Cases** Long-context analysis, code processing High-performance multimodal applications Research, STEM reasoning The Llama 4 family represents Meta\\u2019s most significant AI development to date, with each model offering distinct advantages for different use cases:\", \"score\": 0.69672287, \"raw_content\": null}, {\"url\": \"https://www.llama.com/models/llama-4/\", \"title\": \"Unmatched Performance and Efficiency | Llama 4\", \"content\": \"# Llama 4 # Llama 4 Llama 4 Scout Class-leading natively multimodal model that offers superior text and visual intelligence, single H100 GPU efficiency, and a 10M context window for seamless long document analysis. Llama 4 MaverickIndustry-leading natively multimodal model for image and text understanding with groundbreaking intelligence and fast responses at a low cost. We evaluated model performance on a suite of common benchmarks across a wide range of languages, testing for coding, reasoning, knowledge, vision understanding, multilinguality, and long context. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance.\", \"score\": 0.629889, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
index af3927a10..a8e1e8611 100644
--- a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
+++ b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack_api.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
index 45b2b6450..dd7884012 100644
--- a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
+++ b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json
@@ -10,7 +10,7 @@
   },
   "response": {
     "body": {
-      "__type__": "llama_stack_api.apis.tools.tools.ToolInvocationResult",
+      "__type__": "llama_stack_api.tools.ToolInvocationResult",
       "__data__": {
         "content": "{\"query\": \"latest version of Python\", \"top_k\": [{\"url\": \"https://www.liquidweb.com/blog/latest-python-version/\", \"title\": \"The latest Python version: Python 3.14 - Liquid Web\", \"content\": \"The latest major version, Python 3.14 was officially released on October 7, 2025. Let's explore the key features of Python's current version, how to download\", \"score\": 0.890761, \"raw_content\": null}, {\"url\": \"https://docs.python.org/3/whatsnew/3.14.html\", \"title\": \"What's new in Python 3.14 \\u2014 Python 3.14.0 documentation\", \"content\": \"Python 3.14 is the latest stable release of the Python programming language, with a mix of changes to the language, the implementation, and the standard\", \"score\": 0.8124067, \"raw_content\": null}, {\"url\": \"https://devguide.python.org/versions/\", \"title\": \"Status of Python versions - Python Developer's Guide\", \"content\": \"The main branch is currently the future Python 3.15, and is the only branch that accepts new features. The latest release for each Python version can be found\", \"score\": 0.80089486, \"raw_content\": null}, {\"url\": \"https://www.python.org/doc/versions/\", \"title\": \"Python documentation by version\", \"content\": \"Python 3.12.4, documentation released on 6 June 2024. Python 3.12.3, documentation released on 9 April 2024. Python 3.12.2, documentation released on 6 February\", \"score\": 0.74563974, \"raw_content\": null}, {\"url\": \"https://www.python.org/downloads/\", \"title\": \"Download Python | Python.org\", \"content\": \"Active Python Releases \\u00b7 3.15 pre-release 2026-10-07 (planned) 2031-10 PEP 790 \\u00b7 3.14 bugfix 2025-10-07 2030-10 PEP 745 \\u00b7 3.13 bugfix 2024-10-07 2029-10 PEP 719\", \"score\": 0.6551821, \"raw_content\": null}]}",
         "error_message": null,
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index 2dfcdb27f..b88270a9f 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,7 +12,7 @@ import warnings
 from collections.abc import Generator
 
 import pytest
-from llama_stack_api.apis.safety import ViolationLevel
+from llama_stack_api.safety import ViolationLevel
 
 from llama_stack.models.llama.sku_types import CoreModelId
 
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index e236bdf1e..0cc15e6dd 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,7 +7,7 @@ import base64
 import mimetypes
 
 import pytest
-from llama_stack_api.apis.safety import ViolationLevel
+from llama_stack_api.safety import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
 
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index 2bacb51aa..ca765cafa 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,7 +9,7 @@ import mimetypes
 import os
 
 import pytest
-from llama_stack_api.apis.safety import ViolationLevel
+from llama_stack_api.safety import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
 
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index ddc2ed57e..8d514b192 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,7 +7,7 @@
 import re
 
 import pytest
-from llama_stack_api.apis.common.errors import ToolGroupNotFoundError
+from llama_stack_api.common.errors import ToolGroupNotFoundError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 9a6bf3180..6d7069bf8 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,8 +8,8 @@ import time
 from io import BytesIO
 
 import pytest
-from llama_stack_api.apis.files import ExpiresAfter
-from llama_stack_api.apis.vector_io import Chunk
+from llama_stack_api.files import ExpiresAfter
+from llama_stack_api.vector_io import Chunk
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
@@ -646,7 +646,7 @@ def test_openai_vector_store_attach_file(
 ):
     """Test OpenAI vector store attach file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
@@ -710,7 +710,7 @@ def test_openai_vector_store_attach_files_on_creation(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     # Create some files and attach them to the vector store
     valid_file_ids = []
@@ -775,7 +775,7 @@ def test_openai_vector_store_list_files(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -867,7 +867,7 @@ def test_openai_vector_store_retrieve_file_contents(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -928,7 +928,7 @@ def test_openai_vector_store_delete_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -994,7 +994,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1046,7 +1046,7 @@ def test_openai_vector_store_update_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1103,7 +1103,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
     This test confirms that client.vector_stores.create() creates a unique ID
     """
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack_api.apis.files import ExpiresAfter
+    from llama_stack_api.files import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index f156b429e..f1cc5e8de 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
-from llama_stack_api.apis.vector_io import Chunk
+from llama_stack_api.vector_io import Chunk
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/unit/conversations/test_api_models.py b/tests/unit/conversations/test_api_models.py
index 477510324..361cbc105 100644
--- a/tests/unit/conversations/test_api_models.py
+++ b/tests/unit/conversations/test_api_models.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.apis.conversations.conversations import (
+from llama_stack_api.conversations import (
     Conversation,
     ConversationItem,
     ConversationItemList,
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index fd7044011..ea48aca7a 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,7 +8,7 @@ import tempfile
 from pathlib import Path
 
 import pytest
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.openai_responses import (
     OpenAIResponseInputMessageContentText,
     OpenAIResponseMessage,
 )
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index 411b89fbc..9b2c5e67a 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,8 +6,8 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack_api.apis.safety.safety import ModerationObject, ModerationObjectResults
-from llama_stack_api.apis.shields import ListShieldsResponse, Shield
+from llama_stack_api.safety import ModerationObject, ModerationObjectResults
+from llama_stack_api.shields import ListShieldsResponse, Shield
 
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 00113ac86..806f1bbce 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,7 +7,7 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
-from llama_stack_api.apis.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
+from llama_stack_api.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
 
 from llama_stack.core.routers.vector_io import VectorIORouter
 
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index 5b03ab1f4..95779f0b8 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,9 +9,9 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api.apis.models import ListModelsResponse, Model, ModelType
-from llama_stack_api.apis.shields import ListShieldsResponse, Shield
-from llama_stack_api.providers.datatypes import Api
+from llama_stack_api.datatypes import Api
+from llama_stack_api.models import ListModelsResponse, Model, ModelType
+from llama_stack_api.shields import ListShieldsResponse, Shield
 
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 70702b89d..a39690653 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,14 +9,14 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api.apis.common.content_types import URL
-from llama_stack_api.apis.common.errors import ModelNotFoundError
-from llama_stack_api.apis.common.type_system import NumberType
-from llama_stack_api.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack_api.apis.datatypes import Api
-from llama_stack_api.apis.models import Model, ModelType
-from llama_stack_api.apis.shields.shields import Shield
-from llama_stack_api.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup
+from llama_stack_api.common.content_types import URL
+from llama_stack_api.common.errors import ModelNotFoundError
+from llama_stack_api.common.type_system import NumberType
+from llama_stack_api.datasets import Dataset, DatasetPurpose, URIDataSource
+from llama_stack_api.datatypes import Api
+from llama_stack_api.models import Model, ModelType
+from llama_stack_api.shields import Shield
+from llama_stack_api.tools import ListToolDefsResponse, ToolDef, ToolGroup
 
 from llama_stack.core.datatypes import RegistryEntrySource
 from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index 07a6bcebf..aaa63c743 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -11,7 +11,7 @@ from unittest.mock import patch
 import pytest
 
 # Import the real Pydantic response types instead of using Mocks
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index d8a2c6a5f..855552b9d 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,7 +9,7 @@ from unittest.mock import patch
 
 import pytest
 import yaml
-from llama_stack_api.providers.datatypes import ProviderSpec
+from llama_stack_api.datatypes import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -312,7 +312,7 @@ pip_packages:
         """Test loading an external provider from a module (success path)."""
         from types import SimpleNamespace
 
-        from llama_stack_api.providers.datatypes import Api, ProviderSpec
+        from llama_stack_api.datatypes import Api, ProviderSpec
 
         # Simulate a provider module with get_provider_spec
         fake_spec = ProviderSpec(
@@ -395,7 +395,7 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
-        from llama_stack_api.providers.datatypes import Api
+        from llama_stack_api.datatypes import Api
 
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
 
@@ -457,7 +457,7 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
-        from llama_stack_api.providers.datatypes import ProviderSpec
+        from llama_stack_api.datatypes import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -595,7 +595,7 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
-        from llama_stack_api.providers.datatypes import ProviderSpec
+        from llama_stack_api.datatypes import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -644,7 +644,7 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
-        from llama_stack_api.providers.datatypes import ProviderSpec
+        from llama_stack_api.datatypes import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -693,7 +693,7 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
-        from llama_stack_api.providers.datatypes import ProviderSpec
+        from llama_stack_api.datatypes import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -833,7 +833,7 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
-        from llama_stack_api.providers.datatypes import ProviderSpec
+        from llama_stack_api.datatypes import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index e2c77f57b..eae4f5741 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,9 +6,9 @@
 
 
 import pytest
-from llama_stack_api.apis.common.errors import ResourceNotFoundError
-from llama_stack_api.apis.common.responses import Order
-from llama_stack_api.apis.files import OpenAIFilePurpose
+from llama_stack_api.common.errors import ResourceNotFoundError
+from llama_stack_api.common.responses import Order
+from llama_stack_api.files import OpenAIFilePurpose
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 0a01fe01d..1d423dc33 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,8 +58,8 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.apis.batches import BatchObject
-from llama_stack_api.apis.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api.batches import BatchObject
+from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
 
 
 class TestReferenceBatchesImpl:
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index 700f66a84..01307e0ff 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,7 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
-from llama_stack_api.apis.common.errors import ConflictError
+from llama_stack_api.common.errors import ConflictError
 
 
 class TestReferenceBatchesIdempotency:
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index 16b3f10b7..fb4714914 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,8 +8,8 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
-from llama_stack_api.apis.common.errors import ResourceNotFoundError
-from llama_stack_api.apis.files import OpenAIFilePurpose
+from llama_stack_api.common.errors import ResourceNotFoundError
+from llama_stack_api.files import OpenAIFilePurpose
 
 
 class TestS3FilesImpl:
@@ -227,7 +227,7 @@ class TestS3FilesImpl:
 
             mock_now.return_value = 0
 
-            from llama_stack_api.apis.files import ExpiresAfter
+            from llama_stack_api.files import ExpiresAfter
 
             sample_text_file.filename = "test_expired_file"
             uploaded = await s3_provider.openai_upload_file(
@@ -259,7 +259,7 @@ class TestS3FilesImpl:
 
     async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
         """Unsupported anchor value should raise ValueError."""
-        from llama_stack_api.apis.files import ExpiresAfter
+        from llama_stack_api.files import ExpiresAfter
 
         sample_text_file.filename = "test_unsupported_expires_after_anchor"
 
@@ -272,7 +272,7 @@ class TestS3FilesImpl:
 
     async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
         """Non-integer seconds in expires_after should raise ValueError."""
-        from llama_stack_api.apis.files import ExpiresAfter
+        from llama_stack_api.files import ExpiresAfter
 
         sample_text_file.filename = "test_nonint_expires_after_seconds"
 
@@ -285,7 +285,7 @@ class TestS3FilesImpl:
 
     async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
         """Seconds outside allowed range should raise ValueError."""
-        from llama_stack_api.apis.files import ExpiresAfter
+        from llama_stack_api.files import ExpiresAfter
 
         with pytest.raises(ValueError, match="greater than or equal to 3600"):
             await s3_provider.openai_upload_file(
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index db7161ba1..4c72e6f7d 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,8 +7,8 @@
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.apis.common.errors import ResourceNotFoundError
-from llama_stack_api.apis.files import OpenAIFilePurpose
+from llama_stack_api.common.errors import ResourceNotFoundError
+from llama_stack_api.files import OpenAIFilePurpose
 
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index 04e8f69ab..cc5fc6ce0 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,7 +8,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.apis.inference import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 2b4840884..505e0ac1b 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,7 +9,8 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
-from llama_stack_api.apis.inference import (
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -19,8 +20,7 @@ from llama_stack_api.apis.inference import (
     OpenAICompletionRequestWithExtraBody,
     ToolChoice,
 )
-from llama_stack_api.apis.models import Model
-from llama_stack_api.providers.datatypes import HealthStatus
+from llama_stack_api.models import Model
 
 from llama_stack.core.routers.inference import InferenceRouter
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index 4c01f4d93..d11766c0b 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,7 +7,7 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api.apis.tools import ToolDef
+from llama_stack_api.tools import ToolDef
 
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index e643baede..0300511cb 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,8 +8,8 @@ import os
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.apis.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.datasets import Dataset, DatasetPurpose, URIDataSource
+from llama_stack_api.resource import ResourceType
 
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index 57efc2322..5c5314068 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,11 +8,11 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
-from llama_stack_api.apis.benchmarks import Benchmark
-from llama_stack_api.apis.common.job_types import Job, JobStatus
-from llama_stack_api.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
-from llama_stack_api.apis.inference.inference import TopPSamplingStrategy
-from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.benchmarks import Benchmark
+from llama_stack_api.common.job_types import Job, JobStatus
+from llama_stack_api.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
+from llama_stack_api.inference import TopPSamplingStrategy
+from llama_stack_api.resource import ResourceType
 
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index 9143dda0e..9e939f327 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,7 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.apis.post_training.post_training import (
+from llama_stack_api.post_training import (
     DataConfig,
     DatasetFormat,
     EfficiencyConfig,
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index dc5dcd28a..a87dd74ee 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,7 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 03cb21616..aaa441e23 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,13 +9,13 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
 )
-from llama_stack_api.apis.resource import ResourceType
-from llama_stack_api.apis.safety import RunShieldResponse, ViolationLevel
-from llama_stack_api.apis.shields import Shield
+from llama_stack_api.resource import ResourceType
+from llama_stack_api.safety import RunShieldResponse, ViolationLevel
+from llama_stack_api.shields import Shield
 
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
 from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 24ceba4a5..549fb3176 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,7 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.apis.post_training.post_training import (
+from llama_stack_api.post_training import (
     DataConfig,
     DatasetFormat,
     LoraFinetuningConfig,
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index 6df013214..877f2f286 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,7 +7,7 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack_api.apis.inference import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 928cc43af..3af64a4b3 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,8 +10,8 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
-from llama_stack_api.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
+from llama_stack_api.models import ModelType
 from pydantic import BaseModel, Field
 
 from llama_stack.core.request_headers import request_provider_data_context
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index c6ddeb66d..bdc609503 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
 )
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 63a9f2658..cc83c0037 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,8 +7,8 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api.apis.common.content_types import URL, TextContentItem
-from llama_stack_api.apis.tools import RAGDocument
+from llama_stack_api.common.content_types import URL, TextContentItem
+from llama_stack_api.rag_tool import RAGDocument
 
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
 
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 3a04dedc9..53d1a199d 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,7 +34,7 @@
 #
 
 import pytest
-from llama_stack_api.apis.models import Model
+from llama_stack_api.models import Model
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 6495a93e7..3bb1fac07 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,8 +9,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack_api.apis.vector_stores import VectorStore
+from llama_stack_api.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack_api.vector_stores import VectorStore
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index c5117ebc2..e19bd19a9 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,10 +9,10 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api.apis.files import Files
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse
-from llama_stack_api.apis.vector_stores import VectorStore
-from llama_stack_api.providers.datatypes import HealthStatus
+from llama_stack_api.datatypes import HealthStatus
+from llama_stack_api.files import Files
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse
+from llama_stack_api.vector_stores import VectorStore
 
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 722494851..45c237008 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,7 +8,7 @@ import asyncio
 
 import numpy as np
 import pytest
-from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack_api.vector_io import Chunk, QueryChunksResponse
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 9bb66bb6a..ff2276990 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.common.errors import VectorStoreNotFoundError
+from llama_stack_api.vector_io import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
@@ -19,7 +19,7 @@ from llama_stack_api.apis.vector_io import (
     VectorStoreChunkingStrategyAuto,
     VectorStoreFileObject,
 )
-from llama_stack_api.apis.vector_stores import VectorStore
+from llama_stack_api.vector_stores import VectorStore
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 
@@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
 
 async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
     """Ensure no KeyError when document_id is missing or in different places."""
-    from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata
+    from llama_stack_api.vector_io import Chunk, ChunkMetadata
 
     fake_index = AsyncMock()
     vector_io_adapter.cache["db1"] = fake_index
@@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack_api.apis.vector_io import Chunk
+    from llama_stack_api.vector_io import Chunk
 
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 31f822e1d..11e2302b9 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack_api.vector_io import Chunk, ChunkMetadata
 
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index 2ad15b979..c538fbfbe 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,8 +7,8 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.apis.tools.rag_tool import RAGQueryConfig
-from llama_stack_api.apis.vector_io import (
+from llama_stack_api.rag_tool import RAGQueryConfig
+from llama_stack_api.vector_io import (
     Chunk,
     ChunkMetadata,
     QueryChunksResponse,
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index afeda5bd6..9954d6049 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,12 +12,12 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
-from llama_stack_api.apis.inference.inference import (
+from llama_stack_api.inference import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
 )
-from llama_stack_api.apis.tools import RAGDocument
-from llama_stack_api.apis.vector_io import Chunk
+from llama_stack_api.rag_tool import RAGDocument
+from llama_stack_api.vector_io import Chunk
 
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 96a7bc184..fed0a1710 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,8 +6,8 @@
 
 
 import pytest
-from llama_stack_api.apis.inference import Model
-from llama_stack_api.apis.vector_stores import VectorStore
+from llama_stack_api.inference import Model
+from llama_stack_api.vector_stores import VectorStore
 
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
@@ -304,7 +304,7 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack_api.apis.models import ModelType
+    from llama_stack_api.models import ModelType
 
     from llama_stack.core.datatypes import ModelWithOwner
 
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index ec16f4421..40d3d5deb 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.models import ModelType
 
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index 95fb94b01..ba161ac22 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,8 +8,8 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
-from llama_stack_api.apis.datatypes import Api
-from llama_stack_api.apis.models import ModelType
+from llama_stack_api.datatypes import Api
+from llama_stack_api.models import ModelType
 from pydantic import TypeAdapter, ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 087570e82..25609ff36 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,8 +9,8 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
-from llama_stack_api.apis.inference import Inference
-from llama_stack_api.providers.datatypes import InlineProviderSpec, ProviderSpec
+from llama_stack_api.datatypes import InlineProviderSpec, ProviderSpec
+from llama_stack_api.inference import Inference
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index 3a9c67372..91cc8bbfb 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,7 +9,7 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.common.responses import PaginatedResponse
 
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
 
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 7a682613e..366a06c17 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,7 +9,7 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
-from llama_stack_api.apis.tools import ToolDef
+from llama_stack_api.tools import ToolDef
 from pydantic import ValidationError
 
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index 9059aa9fb..f3b0ab6d0 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,7 +7,7 @@
 import time
 
 import pytest
-from llama_stack_api.apis.inference import (
+from llama_stack_api.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index ffe3dafa0..26b458a57 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,12 +9,12 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
-from llama_stack_api.apis.agents import Order
-from llama_stack_api.apis.agents.openai_responses import (
+from llama_stack_api.agents import Order
+from llama_stack_api.inference import OpenAIMessageParam, OpenAIUserMessageParam
+from llama_stack_api.openai_responses import (
     OpenAIResponseInput,
     OpenAIResponseObject,
 )
-from llama_stack_api.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
 
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@@ -46,7 +46,7 @@ def create_test_response_object(
 
 def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput:
     """Helper to create a test response input."""
-    from llama_stack_api.apis.agents.openai_responses import OpenAIResponseMessage
+    from llama_stack_api.openai_responses import OpenAIResponseMessage
 
     return OpenAIResponseMessage(
         id=input_id,