From 2e5d1c8881f14c1178351905e760a1108eefc84d Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Thu, 13 Nov 2025 14:03:30 -0500
Subject: [PATCH] refactor: enforce top-level imports for llama-stack-api

Enforce that all imports from llama-stack-api use the form:

from llama_stack_api import <symbol>

 This prevents external code from accessing internal package structure
 (e.g., llama_stack_api.agents, llama_stack_api.common.*) and establishes
 a clear public API boundary.

 Changes:
 - Export 400+ symbols from llama_stack_api/__init__.py
 - Include all API types, common utilities, and strong_typing helpers
 - Update files across src/llama_stack, docs/, tests/, scripts/
 - Convert all submodule imports to top-level imports
 - ensure docs use the proper importing structure

 Addresses PR review feedback requiring explicit __all__ definition to
 prevent "peeking inside" the API package.

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .github/workflows/python-build-test.yml       |   4 +-
 docs/docs/concepts/apis/external.mdx          |   9 +-
 .../providers/vector_io/inline_sqlite-vec.mdx |   4 +-
 docs/openapi_generator/generate.py            |   2 +-
 docs/openapi_generator/pyopenapi/generator.py |  26 +-
 .../openapi_generator/pyopenapi/operations.py |  12 +-
 .../pyopenapi/specification.py                |   2 +-
 docs/openapi_generator/pyopenapi/utility.py   |   7 +-
 scripts/generate_prompt_format.py             |   2 +-
 .../llama_stack_api/__init__.py               | 862 +++++++++++++++++-
 src/llama_stack/cli/stack/_list_deps.py       |   2 +-
 src/llama_stack/cli/stack/utils.py            |   2 +-
 src/llama_stack/core/build.py                 |   2 +-
 src/llama_stack/core/client.py                |   2 +-
 src/llama_stack/core/configure.py             |   2 +-
 .../core/conversations/conversations.py       |   2 +-
 src/llama_stack/core/datatypes.py             |  41 +-
 src/llama_stack/core/distribution.py          |   2 +-
 src/llama_stack/core/external.py              |   2 +-
 src/llama_stack/core/inspect.py               |   4 +-
 src/llama_stack/core/library_client.py        |   2 +-
 src/llama_stack/core/prompts/prompts.py       |   2 +-
 src/llama_stack/core/providers.py             |   3 +-
 src/llama_stack/core/resolver.py              |  50 +-
 src/llama_stack/core/routers/__init__.py      |   2 +-
 src/llama_stack/core/routers/datasets.py      |   5 +-
 src/llama_stack/core/routers/eval_scoring.py  |   9 +-
 src/llama_stack/core/routers/inference.py     |  11 +-
 src/llama_stack/core/routers/safety.py        |   5 +-
 src/llama_stack/core/routers/tool_runtime.py  |   4 +-
 src/llama_stack/core/routers/vector_io.py     |  10 +-
 .../core/routing_tables/benchmarks.py         |   2 +-
 src/llama_stack/core/routing_tables/common.py |   5 +-
 .../core/routing_tables/datasets.py           |   6 +-
 src/llama_stack/core/routing_tables/models.py |   4 +-
 .../core/routing_tables/scoring_functions.py  |   6 +-
 .../core/routing_tables/shields.py            |   3 +-
 .../core/routing_tables/toolgroups.py         |  12 +-
 .../core/routing_tables/vector_stores.py      |  10 +-
 src/llama_stack/core/server/auth_providers.py |   2 +-
 src/llama_stack/core/server/routes.py         |   3 +-
 src/llama_stack/core/server/server.py         |   4 +-
 src/llama_stack/core/stack.py                 |  45 +-
 src/llama_stack/core/telemetry/telemetry.py   |   2 +-
 src/llama_stack/distributions/dell/dell.py    |   2 +-
 .../meta-reference-gpu/meta_reference.py      |   2 +-
 .../open-benchmark/open_benchmark.py          |   3 +-
 .../distributions/starter/starter.py          |   2 +-
 src/llama_stack/distributions/template.py     |   3 +-
 .../inline/agents/meta_reference/agents.py    |  18 +-
 .../responses/openai_responses.py             |  25 +-
 .../meta_reference/responses/streaming.py     |  12 +-
 .../meta_reference/responses/tool_executor.py |  18 +-
 .../agents/meta_reference/responses/types.py  |   6 +-
 .../agents/meta_reference/responses/utils.py  |  16 +-
 .../inline/agents/meta_reference/safety.py    |   3 +-
 .../inline/batches/reference/__init__.py      |   4 +-
 .../inline/batches/reference/batches.py       |  14 +-
 .../inline/datasetio/localfs/datasetio.py     |   5 +-
 .../inline/eval/meta_reference/eval.py        |  21 +-
 .../providers/inline/files/localfs/files.py   |   6 +-
 .../inline/inference/meta_reference/config.py |   2 +-
 .../inference/meta_reference/generators.py    |   2 +-
 .../inference/meta_reference/inference.py     |  11 +-
 .../sentence_transformers.py                  |   7 +-
 .../inline/post_training/common/validator.py  |   6 +-
 .../huggingface/post_training.py              |   6 +-
 .../recipes/finetune_single_device.py         |   6 +-
 .../recipes/finetune_single_device_dpo.py     |   6 +-
 .../inline/post_training/huggingface/utils.py |   3 +-
 .../post_training/torchtune/common/utils.py   |   2 +-
 .../post_training/torchtune/post_training.py  |   6 +-
 .../recipes/lora_finetuning_single_device.py  |   8 +-
 .../safety/code_scanner/code_scanner.py       |   6 +-
 .../inline/safety/llama_guard/llama_guard.py  |  15 +-
 .../safety/prompt_guard/prompt_guard.py       |   8 +-
 .../providers/inline/scoring/basic/scoring.py |  11 +-
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   3 +-
 .../basic/scoring_fn/equality_scoring_fn.py   |   3 +-
 .../basic/scoring_fn/fn_defs/docvqa.py        |   4 +-
 .../basic/scoring_fn/fn_defs/equality.py      |   4 +-
 .../basic/scoring_fn/fn_defs/ifeval.py        |   4 +-
 .../fn_defs/regex_parser_math_response.py     |   4 +-
 .../regex_parser_multiple_choice_answer.py    |   4 +-
 .../basic/scoring_fn/fn_defs/subset_of.py     |   4 +-
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   3 +-
 .../regex_parser_math_response_scoring_fn.py  |   3 +-
 .../scoring_fn/regex_parser_scoring_fn.py     |   3 +-
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   3 +-
 .../inline/scoring/braintrust/braintrust.py   |  11 +-
 .../scoring_fn/fn_defs/answer_correctness.py  |   4 +-
 .../scoring_fn/fn_defs/answer_relevancy.py    |   4 +-
 .../scoring_fn/fn_defs/answer_similarity.py   |   4 +-
 .../fn_defs/context_entity_recall.py          |   4 +-
 .../scoring_fn/fn_defs/context_precision.py   |   4 +-
 .../scoring_fn/fn_defs/context_recall.py      |   4 +-
 .../scoring_fn/fn_defs/context_relevancy.py   |   4 +-
 .../scoring_fn/fn_defs/factuality.py          |   4 +-
 .../scoring_fn/fn_defs/faithfulness.py        |   4 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  13 +-
 .../fn_defs/llm_as_judge_405b_simpleqa.py     |   4 +-
 .../scoring_fn/fn_defs/llm_as_judge_base.py   |   3 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   4 +-
 .../inline/tool_runtime/rag/__init__.py       |   2 +-
 .../tool_runtime/rag/context_retriever.py     |   7 +-
 .../inline/tool_runtime/rag/memory.py         |  21 +-
 .../inline/vector_io/chroma/__init__.py       |   2 +-
 .../inline/vector_io/chroma/config.py         |   2 +-
 .../inline/vector_io/faiss/__init__.py        |   2 +-
 .../inline/vector_io/faiss/config.py          |   2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  19 +-
 .../inline/vector_io/milvus/__init__.py       |   2 +-
 .../inline/vector_io/milvus/config.py         |   2 +-
 .../inline/vector_io/qdrant/__init__.py       |   2 +-
 .../inline/vector_io/qdrant/config.py         |   2 +-
 .../inline/vector_io/sqlite_vec/__init__.py   |   2 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  16 +-
 src/llama_stack/providers/registry/agents.py  |   2 +-
 src/llama_stack/providers/registry/batches.py |   2 +-
 .../providers/registry/datasetio.py           |   2 +-
 src/llama_stack/providers/registry/eval.py    |   2 +-
 src/llama_stack/providers/registry/files.py   |   2 +-
 .../providers/registry/inference.py           |   2 +-
 .../providers/registry/post_training.py       |   2 +-
 src/llama_stack/providers/registry/safety.py  |   2 +-
 src/llama_stack/providers/registry/scoring.py |   2 +-
 .../providers/registry/tool_runtime.py        |   2 +-
 .../providers/registry/vector_io.py           |   4 +-
 .../datasetio/huggingface/huggingface.py      |   5 +-
 .../remote/datasetio/nvidia/datasetio.py      |   5 +-
 .../providers/remote/eval/nvidia/eval.py      |  24 +-
 .../providers/remote/files/openai/files.py    |   6 +-
 .../providers/remote/files/s3/files.py        |   6 +-
 .../remote/inference/anthropic/config.py      |   2 +-
 .../remote/inference/azure/config.py          |   2 +-
 .../remote/inference/bedrock/bedrock.py       |   2 +-
 .../remote/inference/cerebras/cerebras.py     |   2 +-
 .../remote/inference/cerebras/config.py       |   2 +-
 .../remote/inference/databricks/config.py     |   2 +-
 .../remote/inference/databricks/databricks.py |   2 +-
 .../remote/inference/fireworks/config.py      |   2 +-
 .../remote/inference/gemini/config.py         |   2 +-
 .../remote/inference/gemini/gemini.py         |   2 +-
 .../providers/remote/inference/groq/config.py |   2 +-
 .../inference/llama_openai_compat/config.py   |   2 +-
 .../inference/llama_openai_compat/llama.py    |   2 +-
 .../remote/inference/nvidia/__init__.py       |   2 +-
 .../remote/inference/nvidia/config.py         |   2 +-
 .../remote/inference/nvidia/nvidia.py         |   5 +-
 .../remote/inference/oci/__init__.py          |   2 +-
 .../providers/remote/inference/oci/config.py  |   2 +-
 .../providers/remote/inference/oci/oci.py     |   4 +-
 .../remote/inference/ollama/ollama.py         |   6 +-
 .../remote/inference/openai/config.py         |   2 +-
 .../remote/inference/passthrough/config.py    |   2 +-
 .../inference/passthrough/passthrough.py      |   4 +-
 .../remote/inference/runpod/config.py         |   2 +-
 .../remote/inference/runpod/runpod.py         |   2 +-
 .../remote/inference/sambanova/config.py      |   2 +-
 .../providers/remote/inference/tgi/config.py  |   2 +-
 .../providers/remote/inference/tgi/tgi.py     |   2 +-
 .../remote/inference/together/config.py       |   2 +-
 .../remote/inference/together/together.py     |   4 +-
 .../remote/inference/vertexai/config.py       |   2 +-
 .../providers/remote/inference/vllm/config.py |   2 +-
 .../providers/remote/inference/vllm/vllm.py   |   4 +-
 .../remote/inference/watsonx/config.py        |   2 +-
 .../remote/inference/watsonx/watsonx.py       |   7 +-
 .../post_training/nvidia/post_training.py     |   2 +-
 .../remote/post_training/nvidia/utils.py      |   2 +-
 .../remote/safety/bedrock/bedrock.py          |   8 +-
 .../providers/remote/safety/bedrock/config.py |   2 +-
 .../providers/remote/safety/nvidia/config.py  |   2 +-
 .../providers/remote/safety/nvidia/nvidia.py  |  14 +-
 .../remote/safety/sambanova/config.py         |   2 +-
 .../remote/safety/sambanova/sambanova.py      |   8 +-
 .../tool_runtime/bing_search/bing_search.py   |   6 +-
 .../tool_runtime/brave_search/brave_search.py |   6 +-
 .../model_context_protocol.py                 |   7 +-
 .../tavily_search/tavily_search.py            |   6 +-
 .../wolfram_alpha/wolfram_alpha.py            |   6 +-
 .../remote/vector_io/chroma/__init__.py       |   2 +-
 .../remote/vector_io/chroma/chroma.py         |  15 +-
 .../remote/vector_io/chroma/config.py         |   2 +-
 .../remote/vector_io/milvus/__init__.py       |   2 +-
 .../remote/vector_io/milvus/config.py         |   2 +-
 .../remote/vector_io/milvus/milvus.py         |  17 +-
 .../remote/vector_io/pgvector/__init__.py     |   2 +-
 .../remote/vector_io/pgvector/config.py       |   2 +-
 .../remote/vector_io/pgvector/pgvector.py     |  17 +-
 .../remote/vector_io/qdrant/__init__.py       |   2 +-
 .../remote/vector_io/qdrant/config.py         |   2 +-
 .../remote/vector_io/qdrant/qdrant.py         |  13 +-
 .../remote/vector_io/weaviate/__init__.py     |   2 +-
 .../remote/vector_io/weaviate/config.py       |   2 +-
 .../remote/vector_io/weaviate/weaviate.py     |  18 +-
 .../utils/common/data_schema_validator.py     |   6 +-
 .../providers/utils/files/form_data.py        |   2 +-
 .../utils/inference/embedding_mixin.py        |   2 +-
 .../utils/inference/inference_store.py        |   2 +-
 .../utils/inference/litellm_openai_mixin.py   |   2 +-
 .../utils/inference/model_registry.py         |   4 +-
 .../utils/inference/openai_compat.py          |  10 +-
 .../providers/utils/inference/openai_mixin.py |   4 +-
 .../utils/inference/prompt_adapter.py         |   8 +-
 .../providers/utils/kvstore/sqlite/config.py  |   2 +-
 .../providers/utils/memory/file_utils.py      |   2 +-
 .../utils/memory/openai_vector_store_mixin.py |   9 +-
 .../providers/utils/memory/vector_store.py    |  14 +-
 src/llama_stack/providers/utils/pagination.py |   2 +-
 .../utils/responses/responses_store.py        |   8 +-
 .../utils/scoring/aggregation_utils.py        |   3 +-
 .../utils/scoring/base_scoring_fn.py          |   3 +-
 .../providers/utils/sqlstore/api.py           |   2 +-
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   2 +-
 src/llama_stack/providers/utils/tools/mcp.py  |   8 +-
 .../src/llama_stack_api_weather/weather.py    |   4 +-
 tests/integration/batches/conftest.py         |   2 +-
 tests/integration/files/test_files.py         |   2 +-
 .../inference/test_provider_data_routing.py   |   4 +-
 .../post_training/test_post_training.py       |   2 +-
 tests/integration/safety/test_llama_guard.py  |   2 +-
 tests/integration/safety/test_safety.py       |   2 +-
 .../integration/safety/test_vision_safety.py  |   2 +-
 .../tool_runtime/test_registration.py         |   2 +-
 .../vector_io/test_openai_vector_stores.py    |  19 +-
 tests/integration/vector_io/test_vector_io.py |   2 +-
 tests/unit/conversations/test_api_models.py   |   6 +-
 .../unit/conversations/test_conversations.py  |   5 +-
 tests/unit/core/routers/test_safety_router.py |   3 +-
 tests/unit/core/routers/test_vector_io.py     |   2 +-
 tests/unit/core/test_stack_validation.py      |   4 +-
 .../routers/test_routing_tables.py            |  23 +-
 .../unit/distribution/test_api_recordings.py  |   2 +-
 tests/unit/distribution/test_distribution.py  |  16 +-
 tests/unit/files/test_files.py                |   4 +-
 .../unit/providers/batches/test_reference.py  |   3 +-
 .../batches/test_reference_idempotency.py     |   2 +-
 tests/unit/providers/files/test_s3_files.py   |  11 +-
 .../providers/files/test_s3_files_auth.py     |   3 +-
 .../inference/test_bedrock_adapter.py         |   2 +-
 .../providers/inference/test_remote_vllm.py   |   6 +-
 .../responses/test_streaming.py               |   2 +-
 tests/unit/providers/nvidia/test_datastore.py |   3 +-
 tests/unit/providers/nvidia/test_eval.py      |  16 +-
 .../unit/providers/nvidia/test_parameters.py  |   2 +-
 .../providers/nvidia/test_rerank_inference.py |   2 +-
 tests/unit/providers/nvidia/test_safety.py    |   9 +-
 .../nvidia/test_supervised_fine_tuning.py     |   2 +-
 tests/unit/providers/test_bedrock.py          |   2 +-
 .../utils/inference/test_openai_mixin.py      |   3 +-
 .../utils/inference/test_prompt_adapter.py    |   5 +-
 .../utils/memory/test_vector_store.py         |   3 +-
 .../providers/utils/test_model_registry.py    |   2 +-
 tests/unit/providers/vector_io/conftest.py    |   3 +-
 tests/unit/providers/vector_io/test_faiss.py  |   5 +-
 .../providers/vector_io/test_sqlite_vec.py    |   2 +-
 .../test_vector_io_openai_vector_stores.py    |  10 +-
 .../providers/vector_io/test_vector_utils.py  |   2 +-
 tests/unit/rag/test_rag_query.py              |   7 +-
 tests/unit/rag/test_vector_store.py           |   7 +-
 tests/unit/registry/test_registry.py          |   5 +-
 tests/unit/registry/test_registry_acl.py      |   2 +-
 tests/unit/server/test_access_control.py      |   3 +-
 tests/unit/server/test_auth.py                |   2 +-
 tests/unit/server/test_resolver.py            |   3 +-
 tests/unit/server/test_sse.py                 |   2 +-
 tests/unit/tools/test_tools_json_schema.py    |   2 +-
 .../utils/inference/test_inference_store.py   |   2 +-
 .../utils/responses/test_responses_store.py   |   9 +-
 270 files changed, 1587 insertions(+), 750 deletions(-)
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index 13cbe4ecc..b0f2c6e69 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -30,14 +30,14 @@ jobs:
         activate-environment: true
         version: 0.7.6
 
-    - name: Build Llama Stack Spec package
+    - name: Build Llama Stack API package
       working-directory: src/llama-stack-api
       run: uv build
 
     - name: Build Llama Stack package
       run: uv build
 
-    - name: Install Llama Stack package (with spec from local build)
+    - name: Install Llama Stack package (with api stubs from local build)
       run: |
         uv pip install --find-links src/llama-stack-api/dist dist/*.whl
 
diff --git a/docs/docs/concepts/apis/external.mdx b/docs/docs/concepts/apis/external.mdx
index 071cc20ff..005b85647 100644
--- a/docs/docs/concepts/apis/external.mdx
+++ b/docs/docs/concepts/apis/external.mdx
@@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha
 
 ```python
 # llama_stack_api_weather/api.py
-from llama_stack_api.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
@@ -79,7 +79,7 @@ A Protocol class like so:
 # llama_stack_api_weather/api.py
 from typing import Protocol
 
-from llama_stack_api.schema_utils import webmethod
+from llama_stack_api import webmethod
 
 
 class WeatherAPI(Protocol):
@@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"]
 # llama-stack-api-weather/src/llama_stack_api_weather/weather.py
 from typing import Protocol
 
-from llama_stack_api.providers.datatypes import (
+from llama_stack_api import (
     Api,
     ProviderSpec,
     RemoteProviderSpec,
+    webmethod,
 )
-from llama_stack_api.schema_utils import webmethod
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index ec054c7e6..45631dff3 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -153,7 +153,7 @@ description: |
   Example using RAGQueryConfig with different search modes:
 
   ```python
-  from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
+  from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
   # Vector search
   config = RAGQueryConfig(mode="vector", max_chunks=5)
@@ -358,7 +358,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 0bcdee4bb..769db32a7 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -16,7 +16,7 @@ import sys
 import fire
 import ruamel.yaml as yaml
 
-from llama_stack_api.version import LLAMA_STACK_API_V1 # noqa: E402
+from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402
 from llama_stack.core.stack import LlamaStack  # noqa: E402
 
 from .pyopenapi.options import Options  # noqa: E402
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 9b5c375d0..afbb5c710 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -16,27 +16,27 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
 
 from fastapi import UploadFile
 
-from llama_stack_api.datatypes import Error
-from llama_stack_api.strong_typing.core import JsonType
-from llama_stack_api.strong_typing.docstring import Docstring, parse_type
-from llama_stack_api.strong_typing.inspection import (
+from llama_stack_api import (
+    Docstring,
+    Error,
+    JsonSchemaGenerator,
+    JsonType,
+    Schema,
+    SchemaOptions,
+    get_schema_identifier,
     is_generic_list,
     is_type_optional,
     is_type_union,
     is_unwrapped_body_param,
+    json_dump_string,
+    object_to_json,
+    parse_type,
+    python_type_to_name,
+    register_schema,
     unwrap_generic_list,
     unwrap_optional_type,
     unwrap_union_types,
 )
-from llama_stack_api.strong_typing.name import python_type_to_name
-from llama_stack_api.strong_typing.schema import (
-    get_schema_identifier,
-    JsonSchemaGenerator,
-    register_schema,
-    Schema,
-    SchemaOptions,
-)
-from llama_stack_api.strong_typing.serialization import json_dump_string, object_to_json
 from pydantic import BaseModel
 
 from .operations import (
diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py
index 65881df43..42a554f2c 100644
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@@ -11,19 +11,21 @@ import typing
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
-from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA
-
 from termcolor import colored
 
-from llama_stack_api.strong_typing.inspection import get_signature
-
 from typing import get_origin, get_args
 
 from fastapi import UploadFile
 from fastapi.params import File, Form
 from typing import Annotated
 
-from llama_stack_api.schema_utils import ExtraBodyField
+from llama_stack_api import (
+    ExtraBodyField,
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+    get_signature,
+)
 
 
 def split_prefix(
diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py
index 6b0af5a05..bfa35f539 100644
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@@ -9,7 +9,7 @@ import enum
 from dataclasses import dataclass
 from typing import Any, ClassVar, Dict, List, Optional, Union
 
-from llama_stack_api.strong_typing.schema import JsonType, Schema, StrictJsonType
+from llama_stack_api import JsonType, Schema, StrictJsonType
 
 URL = str
 
diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py
index 5770a551e..762249eb8 100644
--- a/docs/openapi_generator/pyopenapi/utility.py
+++ b/docs/openapi_generator/pyopenapi/utility.py
@@ -11,8 +11,7 @@ from pathlib import Path
 from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args
 
 from pydantic import BaseModel
-from llama_stack_api.strong_typing.schema import object_to_json, StrictJsonType
-from llama_stack_api.strong_typing.inspection import is_unwrapped_body_param
+from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json
 from llama_stack.core.resolver import api_protocol_map
 
 from .generator import Generator
@@ -165,12 +164,12 @@ def _validate_api_delete_method_returns_none(method) -> str | None:
         return "has no return type annotation"
 
     return_type = hints['return']
-    
+
     # Allow OpenAI endpoints to return response objects since they follow OpenAI specification
     method_name = getattr(method, '__name__', '')
     if method_name.__contains__('openai_'):
         return None
-    
+
     if return_type is not None and return_type is not type(None):
         return "does not return None where None is mandatory"
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 033c3d572..8099a3f0d 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,7 +14,7 @@ import os
 from pathlib import Path
 
 import fire
-from llama_stack_api.common.errors import ModelNotFoundError
+from llama_stack_api import ModelNotFoundError
 
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama-stack-api/llama_stack_api/__init__.py
index beff66128..8bbe9f8bd 100644
--- a/src/llama-stack-api/llama_stack_api/__init__.py
+++ b/src/llama-stack-api/llama_stack_api/__init__.py
@@ -12,16 +12,860 @@ for Llama Stack. It is designed to be a lightweight dependency for external prov
 and clients that need to interact with Llama Stack APIs without requiring the full
 server implementation.
 
-Key components:
-- API modules (agents, inference, safety, etc.): Protocol definitions for all Llama Stack APIs
-- datatypes: Core data types and provider specifications
-- common: Common data types used across APIs
-- strong_typing: Type system utilities
-- schema_utils: Schema validation and utilities
+All imports from this package MUST use the form:
+    from llama_stack_api import <symbol>
+
+Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported
+and considered a code smell. All exported symbols are explicitly listed in __all__.
 """
 
-__version__ = "0.1.0"
+__version__ = "0.4.0"
 
-from . import common, datatypes, schema_utils, strong_typing  # noqa: F401
+# Import submodules for those who need them
+from . import common, strong_typing  # noqa: F401
 
-__all__ = ["common", "datatypes", "schema_utils", "strong_typing"]
+# Import all public API symbols
+from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
+from .batches import Batches, BatchObject, ListBatchesResponse
+from .benchmarks import (
+    Benchmark,
+    BenchmarkInput,
+    Benchmarks,
+    CommonBenchmarkFields,
+    ListBenchmarksResponse,
+)
+
+# Import commonly used types from common submodule
+from .common.content_types import (
+    URL,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    TextContentItem,
+    _URLOrData,
+)
+from .common.errors import (
+    ConflictError,
+    DatasetNotFoundError,
+    InvalidConversationIdError,
+    ModelNotFoundError,
+    ModelTypeError,
+    ResourceNotFoundError,
+    TokenValidationError,
+    ToolGroupNotFoundError,
+    UnsupportedModelError,
+    VectorStoreNotFoundError,
+)
+from .common.job_types import Job, JobStatus
+from .common.responses import Order, PaginatedResponse
+from .common.training_types import Checkpoint, PostTrainingMetric
+from .common.type_system import (
+    ChatCompletionInputType,
+    CompletionInputType,
+    NumberType,
+    ParamType,
+    StringType,
+)
+from .conversations import (
+    Conversation,
+    ConversationDeletedResource,
+    ConversationItem,
+    ConversationItemCreateRequest,
+    ConversationItemDeletedResource,
+    ConversationItemInclude,
+    ConversationItemList,
+    ConversationMessage,
+    Conversations,
+    Metadata,
+)
+from .datasetio import DatasetIO, DatasetStore
+from .datasets import (
+    CommonDatasetFields,
+    Dataset,
+    DatasetInput,
+    DatasetPurpose,
+    Datasets,
+    DatasetType,
+    DataSource,
+    ListDatasetsResponse,
+    RowsDataSource,
+    URIDataSource,
+)
+from .datatypes import (
+    Api,
+    BenchmarksProtocolPrivate,
+    DatasetsProtocolPrivate,
+    DynamicApiMeta,
+    Error,
+    ExternalApiSpec,
+    HealthResponse,
+    HealthStatus,
+    InlineProviderSpec,
+    ModelsProtocolPrivate,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    RoutingTable,
+    ScoringFunctionsProtocolPrivate,
+    ShieldsProtocolPrivate,
+    ToolGroupsProtocolPrivate,
+    VectorStoresProtocolPrivate,
+)
+from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
+from .files import (
+    ExpiresAfter,
+    Files,
+    ListOpenAIFileResponse,
+    OpenAIFileDeleteResponse,
+    OpenAIFileObject,
+    OpenAIFilePurpose,
+)
+from .inference import (
+    Bf16QuantizationConfig,
+    ChatCompletionResponseEventType,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Fp8QuantizationConfig,
+    GrammarResponseFormat,
+    GreedySamplingStrategy,
+    Inference,
+    InferenceProvider,
+    Int4QuantizationConfig,
+    JsonSchemaResponseFormat,
+    ListOpenAIChatCompletionResponse,
+    LogProbConfig,
+    ModelStore,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionMessageContent,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionTextOnlyMessageContent,
+    OpenAIChatCompletionToolCall,
+    OpenAIChatCompletionToolCallFunction,
+    OpenAIChatCompletionUsage,
+    OpenAIChatCompletionUsageCompletionTokensDetails,
+    OpenAIChatCompletionUsagePromptTokensDetails,
+    OpenAIChoice,
+    OpenAIChoiceDelta,
+    OpenAIChoiceLogprobs,
+    OpenAIChunkChoice,
+    OpenAICompletion,
+    OpenAICompletionChoice,
+    OpenAICompletionLogprobs,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAICompletionWithInputMessages,
+    OpenAIDeveloperMessageParam,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIFile,
+    OpenAIFileFile,
+    OpenAIImageURL,
+    OpenAIJSONSchema,
+    OpenAIMessageParam,
+    OpenAIResponseFormatJSONObject,
+    OpenAIResponseFormatJSONSchema,
+    OpenAIResponseFormatParam,
+    OpenAIResponseFormatText,
+    OpenAISystemMessageParam,
+    OpenAITokenLogProb,
+    OpenAIToolMessageParam,
+    OpenAITopLogProb,
+    OpenAIUserMessageParam,
+    QuantizationConfig,
+    QuantizationType,
+    RerankData,
+    RerankResponse,
+    ResponseFormat,
+    ResponseFormatType,
+    SamplingParams,
+    SamplingStrategy,
+    SystemMessage,
+    SystemMessageBehavior,
+    TextTruncation,
+    TokenLogProbs,
+    ToolChoice,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
+)
+from .inspect import (
+    ApiFilter,
+    HealthInfo,
+    Inspect,
+    ListRoutesResponse,
+    RouteInfo,
+    VersionInfo,
+)
+from .models import (
+    CommonModelFields,
+    ListModelsResponse,
+    Model,
+    ModelInput,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+from .openai_responses import (
+    AllowedToolsFilter,
+    ApprovalFilter,
+    ListOpenAIResponseInputItem,
+    ListOpenAIResponseObject,
+    MCPListToolsTool,
+    OpenAIDeleteResponseObject,
+    OpenAIResponseAnnotationCitation,
+    OpenAIResponseAnnotationContainerFileCitation,
+    OpenAIResponseAnnotationFileCitation,
+    OpenAIResponseAnnotationFilePath,
+    OpenAIResponseAnnotations,
+    OpenAIResponseContentPart,
+    OpenAIResponseContentPartOutputText,
+    OpenAIResponseContentPartReasoningSummary,
+    OpenAIResponseContentPartReasoningText,
+    OpenAIResponseContentPartRefusal,
+    OpenAIResponseError,
+    OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
+    OpenAIResponseInputMessageContent,
+    OpenAIResponseInputMessageContentFile,
+    OpenAIResponseInputMessageContentImage,
+    OpenAIResponseInputMessageContentText,
+    OpenAIResponseInputTool,
+    OpenAIResponseInputToolFileSearch,
+    OpenAIResponseInputToolFunction,
+    OpenAIResponseInputToolMCP,
+    OpenAIResponseInputToolWebSearch,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
+    OpenAIResponseMessage,
+    OpenAIResponseObject,
+    OpenAIResponseObjectStream,
+    OpenAIResponseObjectStreamResponseCompleted,
+    OpenAIResponseObjectStreamResponseContentPartAdded,
+    OpenAIResponseObjectStreamResponseContentPartDone,
+    OpenAIResponseObjectStreamResponseCreated,
+    OpenAIResponseObjectStreamResponseFailed,
+    OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseFileSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseFileSearchCallSearching,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseIncomplete,
+    OpenAIResponseObjectStreamResponseInProgress,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
+    OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
+    OpenAIResponseObjectStreamResponseMcpCallCompleted,
+    OpenAIResponseObjectStreamResponseMcpCallFailed,
+    OpenAIResponseObjectStreamResponseMcpCallInProgress,
+    OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
+    OpenAIResponseObjectStreamResponseMcpListToolsFailed,
+    OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
+    OpenAIResponseObjectStreamResponseOutputItemAdded,
+    OpenAIResponseObjectStreamResponseOutputItemDone,
+    OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded,
+    OpenAIResponseObjectStreamResponseOutputTextDelta,
+    OpenAIResponseObjectStreamResponseOutputTextDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded,
+    OpenAIResponseObjectStreamResponseReasoningSummaryPartDone,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningSummaryTextDone,
+    OpenAIResponseObjectStreamResponseReasoningTextDelta,
+    OpenAIResponseObjectStreamResponseReasoningTextDone,
+    OpenAIResponseObjectStreamResponseRefusalDelta,
+    OpenAIResponseObjectStreamResponseRefusalDone,
+    OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
+    OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
+    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
+    OpenAIResponseObjectWithInput,
+    OpenAIResponseOutput,
+    OpenAIResponseOutputMessageContent,
+    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
+    OpenAIResponseOutputMessageFileSearchToolCallResults,
+    OpenAIResponseOutputMessageFunctionToolCall,
+    OpenAIResponseOutputMessageMCPCall,
+    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
+    OpenAIResponseTextFormat,
+    OpenAIResponseTool,
+    OpenAIResponseToolMCP,
+    OpenAIResponseUsage,
+    OpenAIResponseUsageInputTokensDetails,
+    OpenAIResponseUsageOutputTokensDetails,
+    WebSearchToolTypes,
+)
+from .post_training import (
+    AlgorithmConfig,
+    DataConfig,
+    DatasetFormat,
+    DPOAlignmentConfig,
+    DPOLossType,
+    EfficiencyConfig,
+    ListPostTrainingJobsResponse,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    PostTraining,
+    PostTrainingJob,
+    PostTrainingJobArtifactsResponse,
+    PostTrainingJobLogStream,
+    PostTrainingJobStatusResponse,
+    PostTrainingRLHFRequest,
+    QATFinetuningConfig,
+    RLHFAlgorithm,
+    TrainingConfig,
+)
+from .prompts import ListPromptsResponse, Prompt, Prompts
+from .providers import ListProvidersResponse, ProviderInfo, Providers
+from .rag_tool import (
+    DefaultRAGQueryGeneratorConfig,
+    LLMRAGQueryGeneratorConfig,
+    RAGDocument,
+    RAGQueryConfig,
+    RAGQueryGenerator,
+    RAGQueryGeneratorConfig,
+    RAGQueryResult,
+    RAGSearchMode,
+    Ranker,
+    RRFRanker,
+    WeightedRanker,
+)
+from .resource import Resource, ResourceType
+from .safety import (
+    ModerationObject,
+    ModerationObjectResults,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    ShieldStore,
+    ViolationLevel,
+)
+from .schema_utils import (
+    CallableT,
+    ExtraBodyField,
+    WebMethod,
+    json_schema_type,
+    register_schema,
+    webmethod,
+)
+from .scoring import (
+    ScoreBatchResponse,
+    ScoreResponse,
+    Scoring,
+    ScoringFunctionStore,
+    ScoringResult,
+    ScoringResultRow,
+)
+from .scoring_functions import (
+    AggregationFunctionType,
+    BasicScoringFnParams,
+    CommonScoringFnFields,
+    ListScoringFunctionsResponse,
+    LLMAsJudgeScoringFnParams,
+    RegexParserScoringFnParams,
+    ScoringFn,
+    ScoringFnInput,
+    ScoringFnParams,
+    ScoringFnParamsType,
+    ScoringFunctions,
+)
+from .shields import (
+    CommonShieldFields,
+    ListShieldsResponse,
+    Shield,
+    ShieldInput,
+    Shields,
+)
+
+# Import from strong_typing
+from .strong_typing.core import JsonType
+from .strong_typing.docstring import Docstring, parse_type
+from .strong_typing.inspection import (
+    get_signature,
+    is_generic_list,
+    is_type_optional,
+    is_type_union,
+    is_unwrapped_body_param,
+    unwrap_generic_list,
+    unwrap_optional_type,
+    unwrap_union_types,
+)
+from .strong_typing.name import python_type_to_name
+from .strong_typing.schema import (
+    JsonSchemaGenerator,
+    Schema,
+    SchemaOptions,
+    StrictJsonType,
+    get_schema_identifier,
+)
+from .strong_typing.serialization import json_dump_string, object_to_json
+from .tools import (
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    SpecialToolGroup,
+    ToolDef,
+    ToolGroup,
+    ToolGroupInput,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    ToolStore,
+)
+from .vector_io import (
+    Chunk,
+    ChunkMetadata,
+    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
+    OpenAICreateVectorStoreRequestWithExtraBody,
+    QueryChunksResponse,
+    SearchRankingOptions,
+    VectorIO,
+    VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyAuto,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
+    VectorStoreContent,
+    VectorStoreCreateRequest,
+    VectorStoreDeleteResponse,
+    VectorStoreFileBatchObject,
+    VectorStoreFileContentResponse,
+    VectorStoreFileCounts,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileLastError,
+    VectorStoreFileObject,
+    VectorStoreFilesListInBatchResponse,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
+    VectorStoreListResponse,
+    VectorStoreModifyRequest,
+    VectorStoreObject,
+    VectorStoreSearchRequest,
+    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
+    VectorStoreTable,
+)
+from .vector_stores import VectorStore, VectorStoreInput
+from .version import (
+    LLAMA_STACK_API_V1,
+    LLAMA_STACK_API_V1ALPHA,
+    LLAMA_STACK_API_V1BETA,
+)
+
+__all__ = [
+    # Submodules
+    "common",
+    "strong_typing",
+    # Version constants
+    "LLAMA_STACK_API_V1",
+    "LLAMA_STACK_API_V1ALPHA",
+    "LLAMA_STACK_API_V1BETA",
+    # API Symbols
+    "Agents",
+    "AggregationFunctionType",
+    "AlgorithmConfig",
+    "AllowedToolsFilter",
+    "Api",
+    "ApiFilter",
+    "ApprovalFilter",
+    "BasicScoringFnParams",
+    "Batches",
+    "BatchObject",
+    "Benchmark",
+    "BenchmarkConfig",
+    "BenchmarkInput",
+    "Benchmarks",
+    "BenchmarksProtocolPrivate",
+    "Bf16QuantizationConfig",
+    "CallableT",
+    "ChatCompletionInputType",
+    "ChatCompletionResponseEventType",
+    "Checkpoint",
+    "Chunk",
+    "ChunkMetadata",
+    "CommonBenchmarkFields",
+    "ConflictError",
+    "CommonDatasetFields",
+    "CommonModelFields",
+    "CommonScoringFnFields",
+    "CommonShieldFields",
+    "CompletionInputType",
+    "CompletionRequest",
+    "Conversation",
+    "ConversationDeletedResource",
+    "ConversationItem",
+    "ConversationItemCreateRequest",
+    "ConversationItemDeletedResource",
+    "ConversationItemInclude",
+    "ConversationItemList",
+    "ConversationMessage",
+    "Conversations",
+    "DPOAlignmentConfig",
+    "DPOLossType",
+    "DataConfig",
+    "DataSource",
+    "Dataset",
+    "DatasetFormat",
+    "DatasetIO",
+    "DatasetInput",
+    "DatasetPurpose",
+    "DatasetNotFoundError",
+    "DatasetStore",
+    "DatasetType",
+    "Datasets",
+    "DatasetsProtocolPrivate",
+    "DefaultRAGQueryGeneratorConfig",
+    "Docstring",
+    "DynamicApiMeta",
+    "EfficiencyConfig",
+    "EmbeddingTaskType",
+    "EmbeddingsResponse",
+    "Error",
+    "Eval",
+    "EvalCandidate",
+    "EvaluateResponse",
+    "ExpiresAfter",
+    "ExternalApiSpec",
+    "ExtraBodyField",
+    "Files",
+    "Fp8QuantizationConfig",
+    "get_schema_identifier",
+    "get_signature",
+    "GrammarResponseFormat",
+    "GreedySamplingStrategy",
+    "HealthInfo",
+    "HealthResponse",
+    "HealthStatus",
+    "ImageContentItem",
+    "Inference",
+    "InferenceProvider",
+    "InlineProviderSpec",
+    "Inspect",
+    "Int4QuantizationConfig",
+    "InterleavedContent",
+    "InterleavedContentItem",
+    "InvalidConversationIdError",
+    "is_generic_list",
+    "is_type_optional",
+    "is_type_union",
+    "is_unwrapped_body_param",
+    "Job",
+    "JobStatus",
+    "json_dump_string",
+    "json_schema_type",
+    "JsonSchemaGenerator",
+    "JsonSchemaResponseFormat",
+    "JsonType",
+    "LLMAsJudgeScoringFnParams",
+    "LLMRAGQueryGeneratorConfig",
+    "ListBatchesResponse",
+    "ListBenchmarksResponse",
+    "ListDatasetsResponse",
+    "ListModelsResponse",
+    "ListOpenAIChatCompletionResponse",
+    "ListOpenAIFileResponse",
+    "ListOpenAIResponseInputItem",
+    "ListOpenAIResponseObject",
+    "ListPostTrainingJobsResponse",
+    "ListPromptsResponse",
+    "ListProvidersResponse",
+    "ListRoutesResponse",
+    "ListScoringFunctionsResponse",
+    "ListShieldsResponse",
+    "ListToolDefsResponse",
+    "ListToolGroupsResponse",
+    "LogProbConfig",
+    "LoraFinetuningConfig",
+    "MCPListToolsTool",
+    "Metadata",
+    "Model",
+    "ModelCandidate",
+    "ModelInput",
+    "ModelNotFoundError",
+    "ModelStore",
+    "ModelType",
+    "ModelTypeError",
+    "Models",
+    "ModelsProtocolPrivate",
+    "ModerationObject",
+    "ModerationObjectResults",
+    "NumberType",
+    "object_to_json",
+    "OpenAIAssistantMessageParam",
+    "OpenAIChatCompletion",
+    "OpenAIChatCompletionChunk",
+    "OpenAIChatCompletionContentPartImageParam",
+    "OpenAIChatCompletionContentPartParam",
+    "OpenAIChatCompletionContentPartTextParam",
+    "OpenAIChatCompletionMessageContent",
+    "OpenAIChatCompletionRequestWithExtraBody",
+    "OpenAIChatCompletionTextOnlyMessageContent",
+    "OpenAIChatCompletionToolCall",
+    "OpenAIChatCompletionToolCallFunction",
+    "OpenAIChatCompletionUsage",
+    "OpenAIChatCompletionUsageCompletionTokensDetails",
+    "OpenAIChatCompletionUsagePromptTokensDetails",
+    "OpenAIChoice",
+    "OpenAIChoiceDelta",
+    "OpenAIChoiceLogprobs",
+    "OpenAIChunkChoice",
+    "OpenAICompletion",
+    "OpenAICompletionChoice",
+    "OpenAICompletionLogprobs",
+    "OpenAICompletionRequestWithExtraBody",
+    "OpenAICompletionWithInputMessages",
+    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody",
+    "OpenAICreateVectorStoreRequestWithExtraBody",
+    "OpenAIDeleteResponseObject",
+    "OpenAIDeveloperMessageParam",
+    "OpenAIEmbeddingData",
+    "OpenAIEmbeddingUsage",
+    "OpenAIEmbeddingsRequestWithExtraBody",
+    "OpenAIEmbeddingsResponse",
+    "OpenAIFile",
+    "OpenAIFileDeleteResponse",
+    "OpenAIFileFile",
+    "OpenAIFileObject",
+    "OpenAIFilePurpose",
+    "OpenAIImageURL",
+    "OpenAIJSONSchema",
+    "OpenAIListModelsResponse",
+    "OpenAIMessageParam",
+    "OpenAIModel",
+    "Order",
+    "OpenAIResponseAnnotationCitation",
+    "OpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseAnnotationFilePath",
+    "OpenAIResponseAnnotations",
+    "OpenAIResponseContentPart",
+    "OpenAIResponseContentPartOutputText",
+    "OpenAIResponseContentPartReasoningSummary",
+    "OpenAIResponseContentPartReasoningText",
+    "OpenAIResponseContentPartRefusal",
+    "OpenAIResponseError",
+    "OpenAIResponseFormatJSONObject",
+    "OpenAIResponseFormatJSONSchema",
+    "OpenAIResponseFormatParam",
+    "OpenAIResponseFormatText",
+    "OpenAIResponseInput",
+    "OpenAIResponseInputFunctionToolCallOutput",
+    "OpenAIResponseInputMessageContent",
+    "OpenAIResponseInputMessageContentFile",
+    "OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseInputMessageContentText",
+    "OpenAIResponseInputTool",
+    "OpenAIResponseInputToolFileSearch",
+    "OpenAIResponseInputToolFunction",
+    "OpenAIResponseInputToolMCP",
+    "OpenAIResponseInputToolWebSearch",
+    "OpenAIResponseMCPApprovalRequest",
+    "OpenAIResponseMCPApprovalResponse",
+    "OpenAIResponseMessage",
+    "OpenAIResponseObject",
+    "OpenAIResponseObjectStream",
+    "OpenAIResponseObjectStreamResponseCompleted",
+    "OpenAIResponseObjectStreamResponseContentPartAdded",
+    "OpenAIResponseObjectStreamResponseContentPartDone",
+    "OpenAIResponseObjectStreamResponseCreated",
+    "OpenAIResponseObjectStreamResponseFailed",
+    "OpenAIResponseObjectStreamResponseFileSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseFileSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseFileSearchCallSearching",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseInProgress",
+    "OpenAIResponseObjectStreamResponseIncomplete",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
+    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
+    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
+    "OpenAIResponseObjectStreamResponseMcpCallFailed",
+    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
+    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
+    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
+    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
+    "OpenAIResponseObjectStreamResponseOutputItemAdded",
+    "OpenAIResponseObjectStreamResponseOutputItemDone",
+    "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded",
+    "OpenAIResponseObjectStreamResponseOutputTextDelta",
+    "OpenAIResponseObjectStreamResponseOutputTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone",
+    "OpenAIResponseObjectStreamResponseReasoningTextDelta",
+    "OpenAIResponseObjectStreamResponseReasoningTextDone",
+    "OpenAIResponseObjectStreamResponseRefusalDelta",
+    "OpenAIResponseObjectStreamResponseRefusalDone",
+    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
+    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
+    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
+    "OpenAIResponseObjectWithInput",
+    "OpenAIResponseOutput",
+    "OpenAIResponseOutputMessageContent",
+    "OpenAIResponseOutputMessageContentOutputText",
+    "OpenAIResponseOutputMessageFileSearchToolCall",
+    "OpenAIResponseOutputMessageFileSearchToolCallResults",
+    "OpenAIResponseOutputMessageFunctionToolCall",
+    "OpenAIResponseOutputMessageMCPCall",
+    "OpenAIResponseOutputMessageMCPListTools",
+    "OpenAIResponseOutputMessageWebSearchToolCall",
+    "OpenAIResponsePrompt",
+    "OpenAIResponseText",
+    "OpenAIResponseTextFormat",
+    "OpenAIResponseTool",
+    "OpenAIResponseToolMCP",
+    "OpenAIResponseUsage",
+    "OpenAIResponseUsageInputTokensDetails",
+    "OpenAIResponseUsageOutputTokensDetails",
+    "OpenAISystemMessageParam",
+    "OpenAITokenLogProb",
+    "OpenAIToolMessageParam",
+    "OpenAITopLogProb",
+    "OpenAIUserMessageParam",
+    "OptimizerConfig",
+    "OptimizerType",
+    "PaginatedResponse",
+    "ParamType",
+    "parse_type",
+    "PostTraining",
+    "PostTrainingMetric",
+    "PostTrainingJob",
+    "PostTrainingJobArtifactsResponse",
+    "PostTrainingJobLogStream",
+    "PostTrainingJobStatusResponse",
+    "PostTrainingRLHFRequest",
+    "Prompt",
+    "Prompts",
+    "ProviderInfo",
+    "ProviderSpec",
+    "Providers",
+    "python_type_to_name",
+    "QATFinetuningConfig",
+    "QuantizationConfig",
+    "QuantizationType",
+    "QueryChunksResponse",
+    "RAGDocument",
+    "RAGQueryConfig",
+    "RAGQueryGenerator",
+    "RAGQueryGeneratorConfig",
+    "RAGQueryResult",
+    "RAGSearchMode",
+    "register_schema",
+    "RLHFAlgorithm",
+    "RRFRanker",
+    "Ranker",
+    "RegexParserScoringFnParams",
+    "RemoteProviderConfig",
+    "RemoteProviderSpec",
+    "RerankData",
+    "RerankResponse",
+    "Resource",
+    "ResourceNotFoundError",
+    "ResourceType",
+    "ResponseFormat",
+    "ResponseFormatType",
+    "ResponseGuardrail",
+    "ResponseGuardrailSpec",
+    "RouteInfo",
+    "RoutingTable",
+    "RowsDataSource",
+    "RunShieldResponse",
+    "Safety",
+    "SafetyViolation",
+    "SamplingParams",
+    "SamplingStrategy",
+    "ScoreBatchResponse",
+    "ScoreResponse",
+    "Scoring",
+    "ScoringFn",
+    "ScoringFnInput",
+    "ScoringFnParams",
+    "ScoringFnParamsType",
+    "ScoringFunctionStore",
+    "ScoringFunctions",
+    "ScoringFunctionsProtocolPrivate",
+    "ScoringResult",
+    "ScoringResultRow",
+    "Schema",
+    "SchemaOptions",
+    "SearchRankingOptions",
+    "Shield",
+    "ShieldInput",
+    "ShieldStore",
+    "Shields",
+    "ShieldsProtocolPrivate",
+    "SpecialToolGroup",
+    "StrictJsonType",
+    "StringType",
+    "SystemMessage",
+    "SystemMessageBehavior",
+    "TextContentItem",
+    "TextTruncation",
+    "TokenLogProbs",
+    "TokenValidationError",
+    "ToolChoice",
+    "ToolGroupNotFoundError",
+    "ToolDef",
+    "ToolGroup",
+    "ToolGroupInput",
+    "ToolGroups",
+    "ToolGroupsProtocolPrivate",
+    "ToolInvocationResult",
+    "ToolResponseMessage",
+    "ToolRuntime",
+    "ToolStore",
+    "TopKSamplingStrategy",
+    "TopPSamplingStrategy",
+    "TrainingConfig",
+    "UnsupportedModelError",
+    "unwrap_generic_list",
+    "unwrap_optional_type",
+    "unwrap_union_types",
+    "URIDataSource",
+    "URL",
+    "_URLOrData",
+    "UserMessage",
+    "VectorIO",
+    "VectorStore",
+    "VectorStoreChunkingStrategy",
+    "VectorStoreChunkingStrategyAuto",
+    "VectorStoreChunkingStrategyStatic",
+    "VectorStoreChunkingStrategyStaticConfig",
+    "VectorStoreContent",
+    "VectorStoreCreateRequest",
+    "VectorStoreDeleteResponse",
+    "VectorStoreFileBatchObject",
+    "VectorStoreFileContentResponse",
+    "VectorStoreFileCounts",
+    "VectorStoreFileDeleteResponse",
+    "VectorStoreFileLastError",
+    "VectorStoreFileObject",
+    "VectorStoreFileStatus",
+    "VectorStoreFilesListInBatchResponse",
+    "VectorStoreInput",
+    "VectorStoreListFilesResponse",
+    "VectorStoreListResponse",
+    "VectorStoreModifyRequest",
+    "VectorStoreObject",
+    "VectorStoreSearchRequest",
+    "VectorStoreSearchResponse",
+    "VectorStoreSearchResponsePage",
+    "VectorStoreTable",
+    "VectorStoreNotFoundError",
+    "VectorStoresProtocolPrivate",
+    "VersionInfo",
+    "ViolationLevel",
+    "webmethod",
+    "WebMethod",
+    "WebSearchToolTypes",
+    "WeightedRanker",
+]
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 1ad89f79a..50fe394fc 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,7 +9,7 @@ import sys
 from pathlib import Path
 
 import yaml
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index ad3f2aa04..0a4e22b09 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,7 +11,7 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index 473de5b0d..27ded7ede 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,7 +6,7 @@
 
 import sys
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index c3f707b4f..41acacdb5 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,7 +12,7 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
-from llama_stack_api.datatypes import RemoteProviderConfig
+from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index 9da2230b0..bdb3b9734 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,7 +6,7 @@
 import textwrap
 from typing import Any
 
-from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index 5946234de..f7a49fa08 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,7 +8,7 @@ import secrets
 import time
 from typing import Any, Literal
 
-from llama_stack_api.conversations import (
+from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
     ConversationItem,
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 13b5689f1..4231363b6 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,21 +9,32 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
-from llama_stack_api.benchmarks import Benchmark, BenchmarkInput
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Dataset, DatasetInput
-from llama_stack_api.datatypes import Api, ProviderSpec
-from llama_stack_api.eval import Eval
-from llama_stack_api.inference import Inference
-from llama_stack_api.models import Model, ModelInput
-from llama_stack_api.resource import Resource
-from llama_stack_api.safety import Safety
-from llama_stack_api.scoring import Scoring
-from llama_stack_api.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack_api.shields import Shield, ShieldInput
-from llama_stack_api.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack_api.vector_io import VectorIO
-from llama_stack_api.vector_stores import VectorStore, VectorStoreInput
+from llama_stack_api import (
+    Api,
+    Benchmark,
+    BenchmarkInput,
+    Dataset,
+    DatasetInput,
+    DatasetIO,
+    Eval,
+    Inference,
+    Model,
+    ModelInput,
+    ProviderSpec,
+    Resource,
+    Safety,
+    Scoring,
+    ScoringFn,
+    ScoringFnInput,
+    Shield,
+    ShieldInput,
+    ToolGroup,
+    ToolGroupInput,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+    VectorStoreInput,
+)
 from pydantic import BaseModel, Field, field_validator, model_validator
 
 from llama_stack.core.access_control.datatypes import AccessRule
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 82cb05851..162f9f2b0 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,7 +10,7 @@ import os
 from typing import Any
 
 import yaml
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index 42ddf1f82..ce0c7eb72 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,7 +6,7 @@
 
 
 import yaml
-from llama_stack_api.datatypes import Api, ExternalApiSpec
+from llama_stack_api import Api, ExternalApiSpec
 
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 7ffba9101..53ddd3475 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,9 +6,9 @@
 
 from importlib.metadata import version
 
-from llama_stack_api.datatypes import HealthStatus
-from llama_stack_api.inspect import (
+from llama_stack_api import (
     HealthInfo,
+    HealthStatus,
     Inspect,
     ListRoutesResponse,
     RouteInfo,
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 68dd63a33..959284720 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,7 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
-from llama_stack_api.strong_typing.inspection import is_unwrapped_body_param
+from llama_stack_api import is_unwrapped_body_param
 
 try:
     from llama_stack_client import (
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index e5e1904dd..d9532b978 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,7 +7,7 @@
 import json
 from typing import Any
 
-from llama_stack_api.prompts import ListPromptsResponse, Prompt, Prompts
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index bf918cd4f..7337d9e35 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,8 +7,7 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api.datatypes import HealthResponse, HealthStatus
-from llama_stack_api.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 2e15aaed2..ca154fbc6 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,41 +8,45 @@ import importlib.metadata
 import inspect
 from typing import Any
 
-from llama_stack_api.agents import Agents
-from llama_stack_api.batches import Batches
-from llama_stack_api.benchmarks import Benchmarks
-from llama_stack_api.conversations import Conversations
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
+    LLAMA_STACK_API_V1ALPHA,
+    Agents,
     Api,
+    Batches,
+    Benchmarks,
     BenchmarksProtocolPrivate,
+    Conversations,
+    DatasetIO,
+    Datasets,
     DatasetsProtocolPrivate,
+    Eval,
     ExternalApiSpec,
+    Files,
+    Inference,
+    InferenceProvider,
+    Inspect,
+    Models,
     ModelsProtocolPrivate,
+    PostTraining,
+    Prompts,
     ProviderSpec,
     RemoteProviderConfig,
     RemoteProviderSpec,
+    Safety,
+    Scoring,
+    ScoringFunctions,
     ScoringFunctionsProtocolPrivate,
+    Shields,
     ShieldsProtocolPrivate,
+    ToolGroups,
     ToolGroupsProtocolPrivate,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+)
+from llama_stack_api import (
+    Providers as ProvidersAPI,
 )
-from llama_stack_api.eval import Eval
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference, InferenceProvider
-from llama_stack_api.inspect import Inspect
-from llama_stack_api.models import Models
-from llama_stack_api.post_training import PostTraining
-from llama_stack_api.prompts import Prompts
-from llama_stack_api.providers import Providers as ProvidersAPI
-from llama_stack_api.safety import Safety
-from llama_stack_api.scoring import Scoring
-from llama_stack_api.scoring_functions import ScoringFunctions
-from llama_stack_api.shields import Shields
-from llama_stack_api.tools import ToolGroups, ToolRuntime
-from llama_stack_api.vector_io import VectorIO
-from llama_stack_api.vector_stores import VectorStore
-from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index dae0ba7b1..c2d051422 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api, RoutingTable
+from llama_stack_api import Api, RoutingTable
 
 from llama_stack.core.datatypes import (
     AccessRule,
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index 66bdefddf..dcf247874 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,10 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import DatasetPurpose, DataSource
-from llama_stack_api.datatypes import RoutingTable
+from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
 
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index 0e705f556..cbbbf5cc5 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,9 +6,12 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import RoutingTable
-from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
-from llama_stack_api.scoring import (
+from llama_stack_api import (
+    BenchmarkConfig,
+    Eval,
+    EvaluateResponse,
+    Job,
+    RoutingTable,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index 498ab29b1..a538ab02e 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,11 +11,14 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
-from llama_stack_api.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack_api.datatypes import HealthResponse, HealthStatus, RoutingTable
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    HealthResponse,
+    HealthStatus,
     Inference,
     ListOpenAIChatCompletionResponse,
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -34,8 +37,8 @@ from llama_stack_api.inference import (
     OpenAIMessageParam,
     Order,
     RerankResponse,
+    RoutingTable,
 )
-from llama_stack_api.models import ModelType
 from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
 from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
 from pydantic import TypeAdapter
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index f31229e71..f85bbb767 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,10 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import RoutingTable
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import ModerationObject, RunShieldResponse, Safety
-from llama_stack_api.shields import Shield
+from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
 
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index e5db3f445..984a8e2a7 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,10 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
     URL,
-)
-from llama_stack_api.tools import (
     ListToolDefsResponse,
     ToolRuntime,
 )
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index 6880e6322..bfd090e32 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,14 +9,16 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
-from llama_stack_api.common.content_types import InterleavedContent
-from llama_stack_api.datatypes import HealthResponse, HealthStatus, RoutingTable
-from llama_stack_api.models import ModelType
-from llama_stack_api.vector_io import (
+from llama_stack_api import (
     Chunk,
+    HealthResponse,
+    HealthStatus,
+    InterleavedContent,
+    ModelType,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
+    RoutingTable,
     SearchRankingOptions,
     VectorIO,
     VectorStoreChunkingStrategy,
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 76b5c0167..66830bc41 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
 
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index 718fc5544..cfbafc9a8 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,10 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.common.errors import ModelNotFoundError
-from llama_stack_api.datatypes import Api, RoutingTable
-from llama_stack_api.models import Model
-from llama_stack_api.resource import ResourceType
+from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index a17c23d2b..c49c9769b 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,18 +7,18 @@
 import uuid
 from typing import Any
 
-from llama_stack_api.common.errors import DatasetNotFoundError
-from llama_stack_api.datasets import (
+from llama_stack_api import (
     Dataset,
+    DatasetNotFoundError,
     DatasetPurpose,
     Datasets,
     DatasetType,
     DataSource,
     ListDatasetsResponse,
+    ResourceType,
     RowsDataSource,
     URIDataSource,
 )
-from llama_stack_api.resource import ResourceType
 
 from llama_stack.core.datatypes import (
     DatasetWithOwner,
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index d323a835d..e1210a139 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,10 +7,10 @@
 import time
 from typing import Any
 
-from llama_stack_api.common.errors import ModelNotFoundError
-from llama_stack_api.models import (
+from llama_stack_api import (
     ListModelsResponse,
     Model,
+    ModelNotFoundError,
     Models,
     ModelType,
     OpenAIListModelsResponse,
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index fcc0a8007..66165ac2f 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import ParamType
-from llama_stack_api.resource import ResourceType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     ListScoringFunctionsResponse,
+    ParamType,
+    ResourceType,
     ScoringFn,
     ScoringFnParams,
     ScoringFunctions,
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index 326ebca0d..0f981c49d 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,8 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.resource import ResourceType
-from llama_stack_api.shields import ListShieldsResponse, Shield, Shields
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
 
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index c49f36f6e..a552cb96e 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,9 +6,15 @@
 
 from typing import Any
 
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.common.errors import ToolGroupNotFoundError
-from llama_stack_api.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+from llama_stack_api import (
+    URL,
+    ListToolDefsResponse,
+    ListToolGroupsResponse,
+    ToolDef,
+    ToolGroup,
+    ToolGroupNotFoundError,
+    ToolGroups,
+)
 
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index d9d3ce518..f95463b3c 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,12 +6,12 @@
 
 from typing import Any
 
-from llama_stack_api.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack_api.models import ModelType
-from llama_stack_api.resource import ResourceType
-
 # Removed VectorStores import to avoid exposing public API
-from llama_stack_api.vector_io import (
+from llama_stack_api import (
+    ModelNotFoundError,
+    ModelType,
+    ModelTypeError,
+    ResourceType,
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index a0a4c4b12..a7f5d7916 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,7 +11,7 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
-from llama_stack_api.common.errors import TokenValidationError
+from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index 34319dcfa..e7a84937d 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,8 +10,7 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
-from llama_stack_api.datatypes import Api, ExternalApiSpec
-from llama_stack_api.schema_utils import WebMethod
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route
 
 from llama_stack.core.resolver import api_protocol_map
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 6a6c7b2c9..821f21831 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,9 +28,7 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
-from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 983249884..674c35f31 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,27 +12,30 @@ import tempfile
 from typing import Any
 
 import yaml
-from llama_stack_api.agents import Agents
-from llama_stack_api.batches import Batches
-from llama_stack_api.benchmarks import Benchmarks
-from llama_stack_api.conversations import Conversations
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import Api
-from llama_stack_api.eval import Eval
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference
-from llama_stack_api.inspect import Inspect
-from llama_stack_api.models import Models
-from llama_stack_api.post_training import PostTraining
-from llama_stack_api.prompts import Prompts
-from llama_stack_api.providers import Providers
-from llama_stack_api.safety import Safety
-from llama_stack_api.scoring import Scoring
-from llama_stack_api.scoring_functions import ScoringFunctions
-from llama_stack_api.shields import Shields
-from llama_stack_api.tools import ToolGroups, ToolRuntime
-from llama_stack_api.vector_io import VectorIO
+from llama_stack_api import (
+    Agents,
+    Api,
+    Batches,
+    Benchmarks,
+    Conversations,
+    DatasetIO,
+    Datasets,
+    Eval,
+    Files,
+    Inference,
+    Inspect,
+    Models,
+    PostTraining,
+    Prompts,
+    Providers,
+    Safety,
+    Scoring,
+    ScoringFunctions,
+    Shields,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
+)
 
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
index 90a0a8b98..1a56277ea 100644
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@@ -16,7 +16,7 @@ from typing import (
     cast,
 )
 
-from llama_stack_api.schema_utils import json_schema_type, register_schema
+from llama_stack_api import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index f5340b2e2..fd76e3ccb 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.models import ModelType
+from llama_stack_api import ModelType
 
 from llama_stack.core.datatypes import (
     BuildProvider,
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 53ee71a7e..67af0e92a 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack_api.models import ModelType
+from llama_stack_api import ModelType
 
 from llama_stack.core.datatypes import (
     BuildProvider,
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index 52c9c73ed..59deca6d0 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datasets import DatasetPurpose, URIDataSource
-from llama_stack_api.models import ModelType
+from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
 
 from llama_stack.core.datatypes import (
     BenchmarkInput,
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 6bd4f04be..1a8126290 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,7 +7,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import RemoteProviderSpec
+from llama_stack_api import RemoteProviderSpec
 
 from llama_stack.core.datatypes import (
     BuildProvider,
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index 3ef116821..faf5fb085 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,8 +10,7 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
-from llama_stack_api.datasets import DatasetPurpose
-from llama_stack_api.models import ModelType
+from llama_stack_api import DatasetPurpose, ModelType
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 4917ccca5..025fcc676 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,25 +5,25 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.agents import (
+from llama_stack_api import (
     Agents,
+    Conversations,
+    Inference,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseObject,
+    OpenAIResponsePrompt,
+    OpenAIResponseText,
     Order,
     ResponseGuardrail,
+    Safety,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack_api.conversations import Conversations
-from llama_stack_api.inference import (
-    Inference,
-)
-from llama_stack_api.openai_responses import OpenAIResponsePrompt, OpenAIResponseText
-from llama_stack_api.safety import Safety
-from llama_stack_api.tools import ToolGroups, ToolRuntime
-from llama_stack_api.vector_io import VectorIO
 
 from llama_stack.core.datatypes import AccessRule
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index b2d604247..347eeef78 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,20 +8,15 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api.agents import Order, ResponseGuardrailSpec
-from llama_stack_api.common.errors import (
-    InvalidConversationIdError,
-)
-from llama_stack_api.conversations import ConversationItem, Conversations
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    ConversationItem,
+    Conversations,
     Inference,
-    OpenAIMessageParam,
-    OpenAISystemMessageParam,
-)
-from llama_stack_api.openai_responses import (
+    InvalidConversationIdError,
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
@@ -31,10 +26,14 @@ from llama_stack_api.openai_responses import (
     OpenAIResponsePrompt,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    OpenAISystemMessageParam,
+    Order,
+    ResponseGuardrailSpec,
+    Safety,
+    ToolGroups,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack_api.safety import Safety
-from llama_stack_api.tools import ToolGroups, ToolRuntime
-from llama_stack_api.vector_io import VectorIO
 from pydantic import BaseModel, TypeAdapter
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 2f36f14a8..6a791e92d 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,8 +8,11 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    AllowedToolsFilter,
+    ApprovalFilter,
     Inference,
+    MCPListToolsTool,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -17,11 +20,6 @@ from llama_stack_api.inference import (
     OpenAIChatCompletionToolCall,
     OpenAIChoice,
     OpenAIMessageParam,
-)
-from llama_stack_api.openai_responses import (
-    AllowedToolsFilter,
-    ApprovalFilter,
-    MCPListToolsTool,
     OpenAIResponseContentPartOutputText,
     OpenAIResponseContentPartReasoningText,
     OpenAIResponseContentPartRefusal,
@@ -1024,7 +1022,7 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
-        from llama_stack_api.tools import ToolDef
+        from llama_stack_api import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
         from llama_stack.models.llama.datatypes import ToolDefinition
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 41be0969c..38fb2a94f 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,18 +9,12 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
     ImageContentItem,
-    TextContentItem,
-)
-from llama_stack_api.inference import (
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
     OpenAIChatCompletionToolCall,
     OpenAIImageURL,
-    OpenAIToolMessageParam,
-)
-from llama_stack_api.openai_responses import (
     OpenAIResponseInputToolFileSearch,
     OpenAIResponseInputToolMCP,
     OpenAIResponseObjectStreamResponseFileSearchCallCompleted,
@@ -35,9 +29,13 @@ from llama_stack_api.openai_responses import (
     OpenAIResponseOutputMessageFileSearchToolCall,
     OpenAIResponseOutputMessageFileSearchToolCallResults,
     OpenAIResponseOutputMessageWebSearchToolCall,
+    OpenAIToolMessageParam,
+    TextContentItem,
+    ToolGroups,
+    ToolInvocationResult,
+    ToolRuntime,
+    VectorIO,
 )
-from llama_stack_api.tools import ToolGroups, ToolInvocationResult, ToolRuntime
-from llama_stack_api.vector_io import VectorIO
 
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
@@ -398,7 +396,7 @@ class ToolExecutor:
         # Build output message
         message: Any
         if mcp_tool_to_server and function.name in mcp_tool_to_server:
-            from llama_stack_api.openai_responses import (
+            from llama_stack_api import (
                 OpenAIResponseOutputMessageMCPCall,
             )
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 8fa8cdb9d..35ad03378 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,8 +7,10 @@
 from dataclasses import dataclass
 from typing import cast
 
-from llama_stack_api.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam
-from llama_stack_api.openai_responses import (
+from llama_stack_api import (
+    OpenAIChatCompletionToolCall,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     OpenAIResponseInput,
     OpenAIResponseInputTool,
     OpenAIResponseInputToolFileSearch,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 3ccf489f2..943bbae41 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -9,8 +9,7 @@ import re
 import uuid
 from collections.abc import Sequence
 
-from llama_stack_api.agents import ResponseGuardrailSpec
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartParam,
@@ -22,16 +21,11 @@ from llama_stack_api.inference import (
     OpenAIImageURL,
     OpenAIJSONSchema,
     OpenAIMessageParam,
+    OpenAIResponseAnnotationFileCitation,
     OpenAIResponseFormatJSONObject,
     OpenAIResponseFormatJSONSchema,
     OpenAIResponseFormatParam,
     OpenAIResponseFormatText,
-    OpenAISystemMessageParam,
-    OpenAIToolMessageParam,
-    OpenAIUserMessageParam,
-)
-from llama_stack_api.openai_responses import (
-    OpenAIResponseAnnotationFileCitation,
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
@@ -47,8 +41,12 @@ from llama_stack_api.openai_responses import (
     OpenAIResponseOutputMessageMCPCall,
     OpenAIResponseOutputMessageMCPListTools,
     OpenAIResponseText,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+    ResponseGuardrailSpec,
+    Safety,
 )
-from llama_stack_api.safety import Safety
 
 
 async def convert_chat_choice_to_response_message(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index 78cea0864..dd90ac298 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,8 +6,7 @@
 
 import asyncio
 
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import Safety, SafetyViolation, ViolationLevel
+from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
 
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index 8b905a9e8..27d0f4213 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,9 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference
-from llama_stack_api.models import Models
+from llama_stack_api import Files, Inference, Models
 
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index 109643d78..f0f8da96c 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,22 +13,26 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
-from llama_stack_api.batches import Batches, BatchObject, ListBatchesResponse
-from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack_api.files import Files, OpenAIFilePurpose
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    Batches,
+    BatchObject,
+    ConflictError,
+    Files,
     Inference,
+    ListBatchesResponse,
+    Models,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAIDeveloperMessageParam,
     OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIFilePurpose,
     OpenAIMessageParam,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
+    ResourceNotFoundError,
 )
-from llama_stack_api.models import Models
 from openai.types.batch import BatchError, Errors
 from pydantic import BaseModel
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index d18b5a449..1fcfbbef4 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,10 +5,7 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Dataset
-from llama_stack_api.datatypes import DatasetsProtocolPrivate
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index 3c78a1a08..e6020e8a3 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,21 +6,24 @@
 import json
 from typing import Any
 
-from llama_stack_api.agents import Agents
-from llama_stack_api.benchmarks import Benchmark
-from llama_stack_api.common.job_types import Job, JobStatus
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import BenchmarksProtocolPrivate
-from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    Agents,
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarksProtocolPrivate,
+    DatasetIO,
+    Datasets,
+    Eval,
+    EvaluateResponse,
     Inference,
+    Job,
+    JobStatus,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAISystemMessageParam,
     OpenAIUserMessageParam,
+    Scoring,
 )
-from llama_stack_api.scoring import Scoring
 from tqdm import tqdm
 
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index 0e34cd64a..5e8c887f1 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,15 +10,15 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
-from llama_stack_api.common.errors import ResourceNotFoundError
-from llama_stack_api.common.responses import Order
-from llama_stack_api.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
 
 from llama_stack.core.datatypes import AccessRule
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index caae17fc0..802e79f15 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.inference import QuantizationConfig
+from llama_stack_api import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
 from llama_stack.providers.utils.inference import supported_inference_models
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index 21e96c0a9..2155a1ae8 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,7 +8,7 @@ import math
 from typing import Optional
 
 import torch
-from llama_stack_api.inference import (
+from llama_stack_api import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index 940992c0c..753185fe7 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,9 +9,11 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api.datatypes import ModelsProtocolPrivate
-from llama_stack_api.inference import (
+from llama_stack_api import (
     InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -23,7 +25,6 @@ from llama_stack_api.inference import (
     OpenAIUserMessageParam,
     ToolChoice,
 )
-from llama_stack_api.models import Model, ModelType
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
@@ -375,7 +376,7 @@ class MetaReferenceInferenceImpl(
         # Convert tool calls to OpenAI format
         openai_tool_calls = None
         if decoded_message.tool_calls:
-            from llama_stack_api.inference import (
+            from llama_stack_api import (
                 OpenAIChatCompletionToolCall,
                 OpenAIChatCompletionToolCallFunction,
             )
@@ -440,7 +441,7 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
-        from llama_stack_api.inference import (
+        from llama_stack_api import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
             OpenAIChatCompletionToolCallFunction,
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 946849223..14c9a41a4 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,16 +6,17 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack_api.datatypes import ModelsProtocolPrivate
-from llama_stack_api.inference import (
+from llama_stack_api import (
     InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
 )
-from llama_stack_api.models import Model, ModelType
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.embedding_mixin import (
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 072dfcc9d..7a85d0e03 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,11 +12,7 @@
 
 from typing import Any
 
-from llama_stack_api.common.type_system import (
-    ChatCompletionInputType,
-    DialogType,
-    StringType,
-)
+from llama_stack_api import ChatCompletionInputType, DialogType, StringType
 
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index 37eb9973b..f3f3d8d56 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,11 +6,11 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index bc9fb3b85..58a30618c 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,11 +12,11 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     Checkpoint,
     DataConfig,
+    DatasetIO,
+    Datasets,
     LoraFinetuningConfig,
     TrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index 45cfe2e8d..f7dc3ebf2 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,10 +11,10 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     TrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index 649d26f11..86c3c3f52 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,8 +14,7 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.post_training import Checkpoint, TrainingConfig
+from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
 if TYPE_CHECKING:
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index d18d45575..1483b8385 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,7 +13,7 @@
 from collections.abc import Callable
 
 import torch
-from llama_stack_api.post_training import DatasetFormat
+from llama_stack_api import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 2cb01ed41..3370d42fa 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,11 +6,11 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
+    DatasetIO,
+    Datasets,
     DPOAlignmentConfig,
     JobStatus,
     ListPostTrainingJobsResponse,
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index bf221c513..2bf1d0fe7 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,14 +12,14 @@ from pathlib import Path
 from typing import Any
 
 import torch
-from llama_stack_api.common.training_types import PostTrainingMetric
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     Checkpoint,
     DataConfig,
+    DatasetIO,
+    Datasets,
     LoraFinetuningConfig,
     OptimizerConfig,
+    PostTrainingMetric,
     QATFinetuningConfig,
     TrainingConfig,
 )
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 1732c6bae..80e907c10 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,16 +10,16 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import (
+from llama_stack_api import (
     ModerationObject,
     ModerationObjectResults,
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
     ViolationLevel,
 )
-from llama_stack_api.shields import Shield
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import (
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index beaba4572..36e4280b9 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,23 +9,22 @@ import uuid
 from string import Template
 from typing import Any
 
-from llama_stack_api.common.content_types import ImageContentItem, TextContentItem
-from llama_stack_api.datatypes import ShieldsProtocolPrivate
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    ImageContentItem,
     Inference,
+    ModerationObject,
+    ModerationObjectResults,
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAIMessageParam,
     OpenAIUserMessageParam,
-)
-from llama_stack_api.safety import (
-    ModerationObject,
-    ModerationObjectResults,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
+    TextContentItem,
     ViolationLevel,
 )
-from llama_stack_api.shields import Shield
 
 from llama_stack.core.datatypes import Api
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index 531972478..b4f495f19 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,17 +7,17 @@
 from typing import Any
 
 import torch
-from llama_stack_api.datatypes import ShieldsProtocolPrivate
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import (
+from llama_stack_api import (
     ModerationObject,
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ShieldStore,
     ViolationLevel,
 )
-from llama_stack_api.shields import Shield
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 from llama_stack.core.utils.model_utils import model_local_dir
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index d2e2aea8d..326fd9211 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,16 +5,17 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import ScoringFunctionsProtocolPrivate
-from llama_stack_api.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
-from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 
 from llama_stack.core.datatypes import Api
 from llama_stack.providers.utils.common.data_schema_validator import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index d5525b27e..93c2627dd 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,8 +8,7 @@ import json
 import re
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index fd691af59..382c64d88 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,8 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
index af1af88a1..a7305d13a 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
index 34c4ee0ef..f7d2f32ae 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
index 8710ea01d..a2ed1d695 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
index 34286927d..4e2b49a1f 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
+    NumberType,
     RegexParserScoringFnParams,
     ScoringFn,
 )
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
index 3699ed93a..df0cf52d9 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
@@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
+    NumberType,
     RegexParserScoringFnParams,
     ScoringFn,
 )
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
index b05923a4c..1f143c4a6 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 232bd36b0..4ec85bb09 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,8 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index 95892963f..4e9d49e96 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,8 +5,7 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index 6bb5bf118..7f213b38c 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,8 +6,7 @@
 import re
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams, ScoringFnParamsType
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index a2c8140c6..b291924d5 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,8 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index 4ed8f95aa..cbab93c74 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,17 +17,18 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import ScoringFunctionsProtocolPrivate
-from llama_stack_api.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
     ScoringResultRow,
 )
-from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import Api
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
index c0e769fe3..b058305b4 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
index 3479473e1..d619d38a8 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
index 0c0e4a4a0..34354a1fc 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
index 0ecfb2adb..4092ccc4a 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
index bc4bfb999..2b32b9eec 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
index 9af908675..4d6547002 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
index 267f2ca6c..739dfd7bd 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
index e71558618..59ed5949b 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
index ffd242b61..96c36d226 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     BasicScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index c53170643..aa636d2b3 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,17 +5,18 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import ScoringFunctionsProtocolPrivate
-from llama_stack_api.inference import Inference
-from llama_stack_api.scoring import (
+from llama_stack_api import (
+    DatasetIO,
+    Datasets,
+    Inference,
     ScoreBatchResponse,
     ScoreResponse,
     Scoring,
+    ScoringFn,
+    ScoringFnParams,
+    ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
-from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams
 
 from llama_stack.core.datatypes import Api
 from llama_stack.providers.utils.common.data_schema_validator import (
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
index 47c3a4e4e..ed26169a5 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import (
+from llama_stack_api import (
     AggregationFunctionType,
     LLMAsJudgeScoringFnParams,
+    NumberType,
     ScoringFn,
 )
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
index 7e7c69b16..bffffd878 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
@@ -4,8 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn
+from llama_stack_api import LLMAsJudgeScoringFnParams, NumberType, ScoringFn
 
 llm_as_judge_base = ScoringFn(
     identifier="llm-as-judge::base",
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index de0d15b69..169a4d8b7 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,7 @@
 import re
 from typing import Any
 
-from llama_stack_api.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFnParams
+from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
 
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index e958f1112..60117dc3d 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 
 from .config import RagToolRuntimeConfig
 
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index b2f0d884b..f499989cb 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,11 +6,12 @@
 
 
 from jinja2 import Template
-from llama_stack_api.common.content_types import InterleavedContent
-from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack_api.rag_tool import (
+from llama_stack_api import (
     DefaultRAGQueryGeneratorConfig,
+    InterleavedContent,
     LLMRAGQueryGeneratorConfig,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
     RAGQueryGenerator,
     RAGQueryGeneratorConfig,
 )
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 9a5dc63b7..aacb7bb38 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,25 +12,24 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
     URL,
+    Files,
+    Inference,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
-from llama_stack_api.files import Files, OpenAIFilePurpose
-from llama_stack_api.inference import Inference
-from llama_stack_api.rag_tool import RAGDocument, RAGQueryConfig, RAGQueryResult
-from llama_stack_api.tools import (
     ListToolDefsResponse,
+    OpenAIFilePurpose,
+    QueryChunksResponse,
+    RAGDocument,
+    RAGQueryConfig,
+    RAGQueryResult,
+    TextContentItem,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
-)
-from llama_stack_api.vector_io import (
-    QueryChunksResponse,
     VectorIO,
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
index a99c94012..155b8a0cb 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
index c9620480a..d955b1d06 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
index 6c7a4efd8..b834589e3 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
index d0afdd87a..dd433f818 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 4c5362a8d..abef42499 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,12 +12,19 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference, InterleavedContent
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import (
+    Chunk,
+    Files,
+    HealthResponse,
+    HealthStatus,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
index 4aa9db7ec..2f84769f3 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
index c39f2bc4f..08d05c991 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
index f0caeeda6..145d19455 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
index 6041b16a8..437d643f0 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -7,7 +7,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index acaf24eb9..e84c299dc 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.datatypes import Api
+from llama_stack_api import Api
 
 from .config import SQLiteVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 4a5d90f9a..e979ff323 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,12 +12,16 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.datatypes import VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index 824741ed1..bd204cecd 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py
index 0f64d4a48..e11bb8332 100644
--- a/src/llama_stack/providers/registry/batches.py
+++ b/src/llama_stack/providers/registry/batches.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py
index fd8055cbb..bfd7ede3c 100644
--- a/src/llama_stack/providers/registry/datasetio.py
+++ b/src/llama_stack/providers/registry/datasetio.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py
index 5fe973b33..9c8b1eebd 100644
--- a/src/llama_stack/providers/registry/eval.py
+++ b/src/llama_stack/providers/registry/eval.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index d1e7c8fba..dfc527816 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
 
diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
index 1a4efd49e..819e5aff5 100644
--- a/src/llama_stack/providers/registry/inference.py
+++ b/src/llama_stack/providers/registry/inference.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py
index 6b0ad3ecd..a5529b714 100644
--- a/src/llama_stack/providers/registry/post_training.py
+++ b/src/llama_stack/providers/registry/post_training.py
@@ -7,7 +7,7 @@
 
 from typing import cast
 
-from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 # We provide two versions of these providers so that distributions can package the appropriate version of torch.
 # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
diff --git a/src/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py
index 307911ebf..c9dbbce24 100644
--- a/src/llama_stack/providers/registry/safety.py
+++ b/src/llama_stack/providers/registry/safety.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py
index 58679a97c..45c5dbed7 100644
--- a/src/llama_stack/providers/registry/scoring.py
+++ b/src/llama_stack/providers/registry/scoring.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 7b09881d3..3f0a83a30 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py
index b079a3644..a00941586 100644
--- a/src/llama_stack/providers/registry/vector_io.py
+++ b/src/llama_stack/providers/registry/vector_io.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
@@ -244,7 +244,7 @@ Two ranker types are supported:
 Example using RAGQueryConfig with different search modes:
 
 ```python
-from llama_stack_api.rag_tool import RAGQueryConfig, RRFRanker, WeightedRanker
+from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
 
 # Vector search
 config = RAGQueryConfig(mode="vector", max_chunks=5)
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 03cfb2bac..1260ce644 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,7 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Dataset
-from llama_stack_api.datatypes import DatasetsProtocolPrivate
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index b222f9962..cb674b0d7 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,10 +7,7 @@
 from typing import Any
 
 import aiohttp
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.common.responses import PaginatedResponse
-from llama_stack_api.common.type_system import ParamType
-from llama_stack_api.datasets import Dataset
+from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
 
 from .config import NvidiaDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index 0b4113177..fbdec0d4d 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,15 +6,21 @@
 from typing import Any
 
 import requests
-from llama_stack_api.agents import Agents
-from llama_stack_api.benchmarks import Benchmark
-from llama_stack_api.common.job_types import Job, JobStatus
-from llama_stack_api.datasetio import DatasetIO
-from llama_stack_api.datasets import Datasets
-from llama_stack_api.datatypes import BenchmarksProtocolPrivate
-from llama_stack_api.eval import BenchmarkConfig, Eval, EvaluateResponse
-from llama_stack_api.inference import Inference
-from llama_stack_api.scoring import Scoring, ScoringResult
+from llama_stack_api import (
+    Agents,
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarksProtocolPrivate,
+    DatasetIO,
+    Datasets,
+    Eval,
+    EvaluateResponse,
+    Inference,
+    Job,
+    JobStatus,
+    Scoring,
+    ScoringResult,
+)
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index 3db553c40..bbd630977 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,15 +8,15 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
-from llama_stack_api.common.errors import ResourceNotFoundError
-from llama_stack_api.common.responses import Order
-from llama_stack_api.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
 
 from llama_stack.core.datatypes import AccessRule
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 2a98bc620..14f1e3852 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,15 +17,15 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
-from llama_stack_api.common.errors import ResourceNotFoundError
-from llama_stack_api.common.responses import Order
-from llama_stack_api.files import (
+from llama_stack_api import (
     ExpiresAfter,
     Files,
     ListOpenAIFileResponse,
     OpenAIFileDeleteResponse,
     OpenAIFileObject,
     OpenAIFilePurpose,
+    Order,
+    ResourceNotFoundError,
 )
 
 from llama_stack.core.datatypes import AccessRule
diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
index fda53179e..7ee4c54e2 100644
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index 5986a54e4..596f6c234 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 491cda76c..1a9fe533b 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,7 +6,7 @@
 
 from collections.abc import AsyncIterator, Iterable
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 90f7c550e..c7f3111f9 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,7 +6,7 @@
 
 from urllib.parse import urljoin
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index feadafef7..a1fd41e2d 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index 7d48de2e6..4974593d2 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 4f0327b4f..8b802379f 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,7 +7,7 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
-from llama_stack_api.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index 63215fe16..d786655eb 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
index 8f8247ecd..6c25c005c 100644
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index af3a567e0..79d694f06 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index 062259a84..cec327716 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index 5673dcc87..c16311830 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 61587bcd3..1dea3e3cb 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
index fb3ff090a..b89b2a750 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/__init__.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.inference import Inference
+from llama_stack_api import Inference
 
 from .config import NVIDIAConfig
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index 6048a0776..6ff98d290 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index fc09f9448..9e4c6f559 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,13 +8,14 @@
 from collections.abc import Iterable
 
 import aiohttp
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
     RerankData,
     RerankResponse,
 )
-from llama_stack_api.models import Model, ModelType
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py
index e21e64ee7..b7d6125f3 100644
--- a/src/llama_stack/providers/remote/inference/oci/__init__.py
+++ b/src/llama_stack/providers/remote/inference/oci/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.inference import InferenceProvider
+from llama_stack_api import InferenceProvider
 
 from .config import OCIConfig
 
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
index bd2d4f84d..24b4ad926 100644
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 2dedfd434..36e56cf6c 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,11 +10,11 @@ from typing import Any
 
 import httpx
 import oci
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    ModelType,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack_api.models import ModelType
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 35022eedf..6a471429e 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,12 +7,12 @@
 
 import asyncio
 
-from llama_stack_api.common.errors import UnsupportedModelError
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     HealthResponse,
     HealthStatus,
+    Model,
+    UnsupportedModelError,
 )
-from llama_stack_api.models import Model
 from ollama import AsyncClient as AsyncOllamaClient
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index 54c239ed8..cbb01b2d0 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index 5775fdf3c..7045dbf2e 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 01f2b5619..19cf0c5d7 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,8 +6,9 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     Inference,
+    Model,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -16,7 +17,6 @@ from llama_stack_api.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack_api.models import Model
 from openai import AsyncOpenAI
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index b6bd6fba6..aaa4230a8 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index c2ea2c521..4596b2df5 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,7 +6,7 @@
 
 from collections.abc import AsyncIterator
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index 129052468..6d72e7205 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 662d94935..051a2afa3 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index 36684090d..831a26e39 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,7 +8,7 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index 945ccac23..96c0538e3 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index 06dbf26c7..f1355a760 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,12 +8,12 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    Model,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
-from llama_stack_api.models import Model
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
index 45ae64226..53e2b3e65 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index 5708f20f7..23f713961 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index 24bde4a8d..f7938c22c 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,11 +7,9 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
-from llama_stack_api.datatypes import (
+from llama_stack_api import (
     HealthResponse,
     HealthStatus,
-)
-from llama_stack_api.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index c58828fc2..1bba040ef 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,7 +7,7 @@
 import os
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 1b999ba09..de23c25d7 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,7 +9,9 @@ from typing import Any
 
 import litellm
 import requests
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    Model,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -19,7 +21,6 @@ from llama_stack_api.inference import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from llama_stack_api.models import Model, ModelType
 
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
@@ -237,7 +238,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack_api.inference import OpenAIEmbeddingUsage
+        from llama_stack_api import OpenAIEmbeddingUsage
 
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index 7fa1bd89c..02c35241b 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,7 +8,7 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     AlgorithmConfig,
     DPOAlignmentConfig,
     JobStatus,
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index b1f638f27..78762155d 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,7 +7,7 @@
 import warnings
 from typing import Any
 
-from llama_stack_api.post_training import TrainingConfig
+from llama_stack_api import TrainingConfig
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 12e17802b..86b93c32e 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,15 +7,15 @@
 import json
 from typing import Any
 
-from llama_stack_api.datatypes import ShieldsProtocolPrivate
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import (
+from llama_stack_api import (
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ViolationLevel,
 )
-from llama_stack_api.shields import Shield
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
index 9e8f52b11..ca28924d4 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/config.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 
 from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
 
diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
index 105d460c7..fc686ae73 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/config.py
@@ -6,7 +6,7 @@
 import os
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index d40c26f77..b3b5090e0 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,10 +7,16 @@
 from typing import Any
 
 import requests
-from llama_stack_api.datatypes import ShieldsProtocolPrivate
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
-from llama_stack_api.shields import Shield
+from llama_stack_api import (
+    ModerationObject,
+    OpenAIMessageParam,
+    RunShieldResponse,
+    Safety,
+    SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
+    ViolationLevel,
+)
 
 from llama_stack.log import get_logger
 
diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
index eea1d6455..a8e745851 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/config.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 14ef39431..119ebb6ed 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,15 +8,15 @@ from typing import Any
 
 import litellm
 import requests
-from llama_stack_api.datatypes import ShieldsProtocolPrivate
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.safety import (
+from llama_stack_api import (
+    OpenAIMessageParam,
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    Shield,
+    ShieldsProtocolPrivate,
     ViolationLevel,
 )
-from llama_stack_api.shields import Shield
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 11a917432..84e47dd4f 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,12 +8,12 @@ import json
 from typing import Any
 
 import httpx
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
-from llama_stack_api.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index 9247e5543..b7eee776a 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,12 +7,12 @@
 from typing import Any
 
 import httpx
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
-from llama_stack_api.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 7beaebc5f..efb1eb2df 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,11 +7,12 @@
 from typing import Any
 from urllib.parse import urlparse
 
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.datatypes import Api, ToolGroupsProtocolPrivate
-from llama_stack_api.tools import (
+from llama_stack_api import (
+    URL,
+    Api,
     ListToolDefsResponse,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index 9fe525ca5..d65d66e67 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,12 +8,12 @@ import json
 from typing import Any
 
 import httpx
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
-from llama_stack_api.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index ed4a9d4ba..9cc865092 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,12 +8,12 @@ import json
 from typing import Any
 
 import httpx
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.datatypes import ToolGroupsProtocolPrivate
-from llama_stack_api.tools import (
+from llama_stack_api import (
+    URL,
     ListToolDefsResponse,
     ToolDef,
     ToolGroup,
+    ToolGroupsProtocolPrivate,
     ToolInvocationResult,
     ToolRuntime,
 )
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
index 685feb0b8..d774ea643 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index b7e6957cd..eca5d349b 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,11 +9,16 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
-from llama_stack_api.datatypes import VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference, InterleavedContent
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
index d4026f589..b1e4f9a4a 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
index 1237b75d4..1b703d486 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
index 317f7cd91..2e2c788c7 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index b7cc4066e..b856bf918 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,12 +8,17 @@ import asyncio
 import os
 from typing import Any
 
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.datatypes import VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference, InterleavedContent
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index e66644b2c..36018fd95 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
index 1508a5a20..aeb1c83bb 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index e9b5664ae..8aa0303b6 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,12 +8,17 @@ import heapq
 from typing import Any
 
 import psycopg2
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.datatypes import VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference, InterleavedContent
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index b56ff9811..b5b02fe59 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
index b8c776e5a..8cc4cbb2b 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 86ddb351a..53d6be2b6 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,18 +9,19 @@ import hashlib
 import uuid
 from typing import Any
 
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.datatypes import VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference, InterleavedContent
-from llama_stack_api.vector_io import (
+from llama_stack_api import (
     Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
     QueryChunksResponse,
     VectorIO,
+    VectorStore,
     VectorStoreChunkingStrategy,
     VectorStoreFileObject,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
 )
-from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index 7ce2607ea..47546d459 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.datatypes import Api, ProviderSpec
+from llama_stack_api import Api, ProviderSpec
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
index 4d6b37c71..19f9679fb 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 715daa045..c72666f63 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,13 +8,17 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
-from llama_stack_api.common.content_types import InterleavedContent
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.datatypes import VectorStoresProtocolPrivate
-from llama_stack_api.files import Files
-from llama_stack_api.inference import Inference
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse, VectorIO
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index d05c656a6..7ef245779 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,11 +7,7 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api.common.type_system import (
-    ChatCompletionInputType,
-    CompletionInputType,
-    StringType,
-)
+from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
 
 from llama_stack.core.datatypes import Api
 
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 6e3d3f4a7..21afbec2b 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,7 +7,7 @@
 import json
 
 from fastapi import Request
-from llama_stack_api.files import ExpiresAfter
+from llama_stack_api import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
 
diff --git a/src/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py
index aad158085..f7e5c711b 100644
--- a/src/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/src/llama_stack/providers/utils/inference/embedding_mixin.py
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
 if TYPE_CHECKING:
     from sentence_transformers import SentenceTransformer
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     ModelStore,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 79a23d249..3c707dd01 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,7 +6,7 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
     OpenAICompletionWithInputMessages,
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index cff3e2210..4f468725b 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,7 +9,7 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
-from llama_stack_api.inference import (
+from llama_stack_api import (
     InferenceProvider,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index d8589ba4f..e7ca5ab74 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,9 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.common.errors import UnsupportedModelError
-from llama_stack_api.datatypes import ModelsProtocolPrivate
-from llama_stack_api.models import Model, ModelType
+from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.log import get_logger
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index 3f9e4aa38..c97e42274 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,19 +20,17 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
     URL,
-    ImageContentItem,
-    TextContentItem,
-    _URLOrData,
-)
-from llama_stack_api.inference import (
     GreedySamplingStrategy,
+    ImageContentItem,
     JsonSchemaResponseFormat,
     OpenAIResponseFormatParam,
     SamplingParams,
+    TextContentItem,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
+    _URLOrData,
 )
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index 0b41b092f..c05873df5 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,8 +10,9 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
-from llama_stack_api.inference import (
+from llama_stack_api import (
     Model,
+    ModelType,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -23,7 +24,6 @@ from llama_stack_api.inference import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from llama_stack_api.models import ModelType
 from openai import AsyncOpenAI
 from pydantic import BaseModel, ConfigDict
 
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index 2c59319bc..ea01a34e9 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,14 +12,11 @@ import re
 from typing import Any
 
 import httpx
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
+    CompletionRequest,
     ImageContentItem,
     InterleavedContent,
     InterleavedContentItem,
-    TextContentItem,
-)
-from llama_stack_api.inference import (
-    CompletionRequest,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
@@ -30,6 +27,7 @@ from llama_stack_api.inference import (
     OpenAIUserMessageParam,
     ResponseFormat,
     ResponseFormatType,
+    TextContentItem,
     ToolChoice,
 )
 from PIL import Image as PIL_Image
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
index 41b6af906..895268a4f 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.schema_utils import json_schema_type
+from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 
diff --git a/src/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py
index bd0ceb26c..6786293c6 100644
--- a/src/llama_stack/providers/utils/memory/file_utils.py
+++ b/src/llama_stack/providers/utils/memory/file_utils.py
@@ -8,7 +8,7 @@ import base64
 import mimetypes
 import os
 
-from llama_stack_api.common.content_types import URL
+from llama_stack_api import URL
 
 
 def data_url_from_file(file_path: str) -> URL:
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index a89d53d11..68d1c11e5 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,14 +13,15 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.files import Files, OpenAIFileObject
-from llama_stack_api.vector_io import (
+from llama_stack_api import (
     Chunk,
+    Files,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
+    OpenAIFileObject,
     QueryChunksResponse,
     SearchRankingOptions,
+    VectorStore,
     VectorStoreChunkingStrategy,
     VectorStoreChunkingStrategyAuto,
     VectorStoreChunkingStrategyStatic,
@@ -37,11 +38,11 @@ from llama_stack_api.vector_io import (
     VectorStoreFileStatus,
     VectorStoreListFilesResponse,
     VectorStoreListResponse,
+    VectorStoreNotFoundError,
     VectorStoreObject,
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from llama_stack_api.vector_stores import VectorStore
 from pydantic import TypeAdapter
 
 from llama_stack.core.id_generation import generate_object_id
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 775d47dbe..37ac79039 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,15 +14,17 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
     URL,
+    Api,
+    Chunk,
+    ChunkMetadata,
     InterleavedContent,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    QueryChunksResponse,
+    RAGDocument,
+    VectorStore,
 )
-from llama_stack_api.datatypes import Api
-from llama_stack_api.inference import OpenAIEmbeddingsRequestWithExtraBody
-from llama_stack_api.rag_tool import RAGDocument
-from llama_stack_api.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack_api.vector_stores import VectorStore
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
diff --git a/src/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py
index edfd1e33b..d1d9e36c5 100644
--- a/src/llama_stack/providers/utils/pagination.py
+++ b/src/llama_stack/providers/utils/pagination.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api import PaginatedResponse
 
 
 def paginate_records(
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index 095a1e43a..fe7bca7a5 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,17 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.agents import (
-    Order,
-)
-from llama_stack_api.inference import OpenAIMessageParam
-from llama_stack_api.openai_responses import (
+from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
     OpenAIDeleteResponseObject,
+    OpenAIMessageParam,
     OpenAIResponseInput,
     OpenAIResponseObject,
     OpenAIResponseObjectWithInput,
+    Order,
 )
 
 from llama_stack.core.datatypes import AccessRule
diff --git a/src/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
index ef59e0ed0..aa6fe7248 100644
--- a/src/llama_stack/providers/utils/scoring/aggregation_utils.py
+++ b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
@@ -6,8 +6,7 @@
 import statistics
 from typing import Any
 
-from llama_stack_api.scoring import ScoringResultRow
-from llama_stack_api.scoring_functions import AggregationFunctionType
+from llama_stack_api import AggregationFunctionType, ScoringResultRow
 
 
 def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index aee998c01..d16c75263 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,8 +6,7 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack_api.scoring import ScoringFnParams, ScoringResultRow
-from llama_stack_api.scoring_functions import ScoringFn
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
 
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index e21aba382..87880eeb8 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,7 +8,7 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
-from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api import PaginatedResponse
 from pydantic import BaseModel
 
 
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index 74c164c73..98d4f5a53 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,7 +6,7 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
-from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index 8b2560cb5..82c85f46c 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,16 +10,14 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
-from llama_stack_api.common.content_types import (
+from llama_stack_api import (
     ImageContentItem,
     InterleavedContentItem,
-    TextContentItem,
-    _URLOrData,
-)
-from llama_stack_api.tools import (
     ListToolDefsResponse,
+    TextContentItem,
     ToolDef,
     ToolInvocationResult,
+    _URLOrData,
 )
 from mcp import ClientSession, McpError
 from mcp import types as mcp_types
diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
index 7318a697f..9c399b7bf 100644
--- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
+++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py
@@ -6,9 +6,7 @@
 
 from typing import Protocol
 
-from llama_stack_api.datatypes import Api, ProviderSpec, RemoteProviderSpec
-from llama_stack_api.schema_utils import webmethod
-from llama_stack_api.version import LLAMA_STACK_API_V1
+from llama_stack_api import LLAMA_STACK_API_V1, Api, ProviderSpec, RemoteProviderSpec, webmethod
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index 770578311..b9c0ac916 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,7 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
-from llama_stack_api.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose
 
 
 class BatchHelper:
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index f9cbf7987..61878ac4c 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,7 +9,7 @@ from unittest.mock import patch
 
 import pytest
 import requests
-from llama_stack_api.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose
 
 from llama_stack.core.datatypes import User
 
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index 0b4186c3c..d007b57d6 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,8 +15,8 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
-from llama_stack_api.datatypes import Api
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    Api,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionUsage,
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index 8728d4aa4..ff6925b58 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,7 +9,7 @@ import time
 import uuid
 
 import pytest
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     DPOAlignmentConfig,
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index b88270a9f..99b4982f0 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,7 +12,7 @@ import warnings
 from collections.abc import Generator
 
 import pytest
-from llama_stack_api.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 from llama_stack.models.llama.sku_types import CoreModelId
 
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index 0cc15e6dd..6a926f1d5 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,7 +7,7 @@ import base64
 import mimetypes
 
 import pytest
-from llama_stack_api.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
 
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index ca765cafa..b85a23263 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,7 +9,7 @@ import mimetypes
 import os
 
 import pytest
-from llama_stack_api.safety import ViolationLevel
+from llama_stack_api import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
 
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index 8d514b192..1b1b6ef28 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,7 +7,7 @@
 import re
 
 import pytest
-from llama_stack_api.common.errors import ToolGroupNotFoundError
+from llama_stack_api import ToolGroupNotFoundError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 6d7069bf8..c65dfecac 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,8 +8,7 @@ import time
 from io import BytesIO
 
 import pytest
-from llama_stack_api.files import ExpiresAfter
-from llama_stack_api.vector_io import Chunk
+from llama_stack_api import Chunk, ExpiresAfter
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
@@ -646,7 +645,7 @@ def test_openai_vector_store_attach_file(
 ):
     """Test OpenAI vector store attach file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
@@ -710,7 +709,7 @@ def test_openai_vector_store_attach_files_on_creation(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create some files and attach them to the vector store
     valid_file_ids = []
@@ -775,7 +774,7 @@ def test_openai_vector_store_list_files(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -867,7 +866,7 @@ def test_openai_vector_store_retrieve_file_contents(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -928,7 +927,7 @@ def test_openai_vector_store_delete_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -994,7 +993,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1046,7 +1045,7 @@ def test_openai_vector_store_update_file(
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
 
     compat_client = compat_client_with_empty_stores
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     # Create a vector store
     vector_store = compat_client.vector_stores.create(
@@ -1103,7 +1102,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
     This test confirms that client.vector_stores.create() creates a unique ID
     """
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    from llama_stack_api.files import ExpiresAfter
+    from llama_stack_api import ExpiresAfter
 
     compat_client = compat_client_with_empty_stores
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index f1cc5e8de..acaa44bcb 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
-from llama_stack_api.vector_io import Chunk
+from llama_stack_api import Chunk
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/unit/conversations/test_api_models.py b/tests/unit/conversations/test_api_models.py
index 361cbc105..f8576f076 100644
--- a/tests/unit/conversations/test_api_models.py
+++ b/tests/unit/conversations/test_api_models.py
@@ -5,11 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.conversations import (
-    Conversation,
-    ConversationItem,
-    ConversationItemList,
-)
+from llama_stack_api import Conversation, ConversationItem, ConversationItemList
 
 
 def test_conversation_model_defaults():
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index ea48aca7a..2f942eb9c 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,10 +8,7 @@ import tempfile
 from pathlib import Path
 
 import pytest
-from llama_stack_api.openai_responses import (
-    OpenAIResponseInputMessageContentText,
-    OpenAIResponseMessage,
-)
+from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 from openai.types.conversations.conversation import Conversation as OpenAIConversation
 from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
 from pydantic import TypeAdapter
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index 9b2c5e67a..7e465513e 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,8 +6,7 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack_api.safety import ModerationObject, ModerationObjectResults
-from llama_stack_api.shields import ListShieldsResponse, Shield
+from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
 
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 806f1bbce..071fbe6e7 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,7 +7,7 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
-from llama_stack_api.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
+from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
 
 from llama_stack.core.routers.vector_io import VectorIORouter
 
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index 95779f0b8..acb31e1c9 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,9 +9,7 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api.datatypes import Api
-from llama_stack_api.models import ListModelsResponse, Model, ModelType
-from llama_stack_api.shields import ListShieldsResponse, Shield
+from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index a39690653..2405d536e 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,14 +9,21 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api.common.content_types import URL
-from llama_stack_api.common.errors import ModelNotFoundError
-from llama_stack_api.common.type_system import NumberType
-from llama_stack_api.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack_api.datatypes import Api
-from llama_stack_api.models import Model, ModelType
-from llama_stack_api.shields import Shield
-from llama_stack_api.tools import ListToolDefsResponse, ToolDef, ToolGroup
+from llama_stack_api import (
+    URL,
+    Api,
+    Dataset,
+    DatasetPurpose,
+    ListToolDefsResponse,
+    Model,
+    ModelNotFoundError,
+    ModelType,
+    NumberType,
+    Shield,
+    ToolDef,
+    ToolGroup,
+    URIDataSource,
+)
 
 from llama_stack.core.datatypes import RegistryEntrySource
 from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index aaa63c743..f66b57df8 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -11,7 +11,7 @@ from unittest.mock import patch
 import pytest
 
 # Import the real Pydantic response types instead of using Mocks
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index 855552b9d..a27455e24 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,7 +9,7 @@ from unittest.mock import patch
 
 import pytest
 import yaml
-from llama_stack_api.datatypes import ProviderSpec
+from llama_stack_api import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -312,7 +312,7 @@ pip_packages:
         """Test loading an external provider from a module (success path)."""
         from types import SimpleNamespace
 
-        from llama_stack_api.datatypes import Api, ProviderSpec
+        from llama_stack_api import Api, ProviderSpec
 
         # Simulate a provider module with get_provider_spec
         fake_spec = ProviderSpec(
@@ -395,7 +395,7 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
-        from llama_stack_api.datatypes import Api
+        from llama_stack_api import Api
 
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
 
@@ -457,7 +457,7 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
-        from llama_stack_api.datatypes import ProviderSpec
+        from llama_stack_api import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -595,7 +595,7 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
-        from llama_stack_api.datatypes import ProviderSpec
+        from llama_stack_api import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -644,7 +644,7 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
-        from llama_stack_api.datatypes import ProviderSpec
+        from llama_stack_api import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -693,7 +693,7 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
-        from llama_stack_api.datatypes import ProviderSpec
+        from llama_stack_api import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
@@ -833,7 +833,7 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
-        from llama_stack_api.datatypes import ProviderSpec
+        from llama_stack_api import ProviderSpec
 
         from llama_stack.core.distribution import get_external_providers_from_module
 
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index eae4f5741..080d1ddbe 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,9 +6,7 @@
 
 
 import pytest
-from llama_stack_api.common.errors import ResourceNotFoundError
-from llama_stack_api.common.responses import Order
-from llama_stack_api.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 1d423dc33..3c93a578d 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,8 +58,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.batches import BatchObject
-from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
 
 
 class TestReferenceBatchesImpl:
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index 01307e0ff..4cd5d962d 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,7 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
-from llama_stack_api.common.errors import ConflictError
+from llama_stack_api import ConflictError
 
 
 class TestReferenceBatchesIdempotency:
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index fb4714914..ae63c1a78 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,8 +8,7 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
-from llama_stack_api.common.errors import ResourceNotFoundError
-from llama_stack_api.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
 class TestS3FilesImpl:
@@ -227,7 +226,7 @@ class TestS3FilesImpl:
 
             mock_now.return_value = 0
 
-            from llama_stack_api.files import ExpiresAfter
+            from llama_stack_api import ExpiresAfter
 
             sample_text_file.filename = "test_expired_file"
             uploaded = await s3_provider.openai_upload_file(
@@ -259,7 +258,7 @@ class TestS3FilesImpl:
 
     async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
         """Unsupported anchor value should raise ValueError."""
-        from llama_stack_api.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         sample_text_file.filename = "test_unsupported_expires_after_anchor"
 
@@ -272,7 +271,7 @@ class TestS3FilesImpl:
 
     async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
         """Non-integer seconds in expires_after should raise ValueError."""
-        from llama_stack_api.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         sample_text_file.filename = "test_nonint_expires_after_seconds"
 
@@ -285,7 +284,7 @@ class TestS3FilesImpl:
 
     async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
         """Seconds outside allowed range should raise ValueError."""
-        from llama_stack_api.files import ExpiresAfter
+        from llama_stack_api import ExpiresAfter
 
         with pytest.raises(ValueError, match="greater than or equal to 3600"):
             await s3_provider.openai_upload_file(
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index 4c72e6f7d..873db4e27 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,8 +7,7 @@
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.common.errors import ResourceNotFoundError
-from llama_stack_api.files import OpenAIFilePurpose
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index cc5fc6ce0..b3eecc558 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,7 +8,7 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 505e0ac1b..e2a5455b7 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,8 +9,9 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
-from llama_stack_api.datatypes import HealthStatus
-from llama_stack_api.inference import (
+from llama_stack_api import (
+    HealthStatus,
+    Model,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionRequestWithExtraBody,
@@ -20,7 +21,6 @@ from llama_stack_api.inference import (
     OpenAICompletionRequestWithExtraBody,
     ToolChoice,
 )
-from llama_stack_api.models import Model
 
 from llama_stack.core.routers.inference import InferenceRouter
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index d11766c0b..36d2b86a9 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,7 +7,7 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api.tools import ToolDef
+from llama_stack_api import ToolDef
 
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index 0300511cb..0d9f1cc35 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,8 +8,7 @@ import os
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.datasets import Dataset, DatasetPurpose, URIDataSource
-from llama_stack_api.resource import ResourceType
+from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index 5c5314068..c41379801 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,11 +8,17 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
-from llama_stack_api.benchmarks import Benchmark
-from llama_stack_api.common.job_types import Job, JobStatus
-from llama_stack_api.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
-from llama_stack_api.inference import TopPSamplingStrategy
-from llama_stack_api.resource import ResourceType
+from llama_stack_api import (
+    Benchmark,
+    BenchmarkConfig,
+    EvaluateResponse,
+    Job,
+    JobStatus,
+    ModelCandidate,
+    ResourceType,
+    SamplingParams,
+    TopPSamplingStrategy,
+)
 
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index 9e939f327..ba68a7abe 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,7 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     EfficiencyConfig,
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index a87dd74ee..8b313abcd 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,7 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
-from llama_stack_api.models import ModelType
+from llama_stack_api import ModelType
 
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index aaa441e23..ea6254841 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,13 +9,14 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
+    ResourceType,
+    RunShieldResponse,
+    Shield,
+    ViolationLevel,
 )
-from llama_stack_api.resource import ResourceType
-from llama_stack_api.safety import RunShieldResponse, ViolationLevel
-from llama_stack_api.shields import Shield
 
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
 from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 549fb3176..4d0ce695b 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,7 +9,7 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api.post_training import (
+from llama_stack_api import (
     DataConfig,
     DatasetFormat,
     LoraFinetuningConfig,
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index 877f2f286..df7453712 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,7 +7,7 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 3af64a4b3..b9b59bb79 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,8 +10,7 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
-from llama_stack_api.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
-from llama_stack_api.models import ModelType
+from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 from pydantic import BaseModel, Field
 
 from llama_stack.core.request_headers import request_provider_data_context
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index bdc609503..a7c9289d7 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,10 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.inference import (
-    OpenAIAssistantMessageParam,
-    OpenAIUserMessageParam,
-)
+from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
 
 from llama_stack.models.llama.datatypes import RawTextItem
 from llama_stack.providers.utils.inference.prompt_adapter import (
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index cc83c0037..00db5795a 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,8 +7,7 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api.common.content_types import URL, TextContentItem
-from llama_stack_api.rag_tool import RAGDocument
+from llama_stack_api import URL, RAGDocument, TextContentItem
 
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
 
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 53d1a199d..4a85cf8b8 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,7 +34,7 @@
 #
 
 import pytest
-from llama_stack_api.models import Model
+from llama_stack_api import Model
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 3bb1fac07..216e9b8ea 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,8 +9,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index e19bd19a9..0d5c1399f 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,10 +9,7 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api.datatypes import HealthStatus
-from llama_stack_api.files import Files
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 45c237008..17a99ce1c 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,7 +8,7 @@ import asyncio
 
 import numpy as np
 import pytest
-from llama_stack_api.vector_io import Chunk, QueryChunksResponse
+from llama_stack_api import Chunk, QueryChunksResponse
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index ff2276990..7ba40eefb 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,16 +10,16 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api.common.errors import VectorStoreNotFoundError
-from llama_stack_api.vector_io import (
+from llama_stack_api import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
     OpenAICreateVectorStoreRequestWithExtraBody,
     QueryChunksResponse,
+    VectorStore,
     VectorStoreChunkingStrategyAuto,
     VectorStoreFileObject,
+    VectorStoreNotFoundError,
 )
-from llama_stack_api.vector_stores import VectorStore
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 
@@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
 
 async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
     """Ensure no KeyError when document_id is missing or in different places."""
-    from llama_stack_api.vector_io import Chunk, ChunkMetadata
+    from llama_stack_api import Chunk, ChunkMetadata
 
     fake_index = AsyncMock()
     vector_io_adapter.cache["db1"] = fake_index
@@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack_api.vector_io import Chunk
+    from llama_stack_api import Chunk
 
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 11e2302b9..678b76fbd 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api.vector_io import Chunk, ChunkMetadata
+from llama_stack_api import Chunk, ChunkMetadata
 
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index c538fbfbe..e3f5e46d7 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,12 +7,7 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.rag_tool import RAGQueryConfig
-from llama_stack_api.vector_io import (
-    Chunk,
-    ChunkMetadata,
-    QueryChunksResponse,
-)
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
 
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 9954d6049..23c12dcab 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,12 +12,7 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
-from llama_stack_api.inference import (
-    OpenAIEmbeddingData,
-    OpenAIEmbeddingsRequestWithExtraBody,
-)
-from llama_stack_api.rag_tool import RAGDocument
-from llama_stack_api.vector_io import Chunk
+from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index fed0a1710..01f486ab2 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,8 +6,7 @@
 
 
 import pytest
-from llama_stack_api.inference import Model
-from llama_stack_api.vector_stores import VectorStore
+from llama_stack_api import Model, VectorStore
 
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
@@ -304,7 +303,7 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack_api.models import ModelType
+    from llama_stack_api import ModelType
 
     from llama_stack.core.datatypes import ModelWithOwner
 
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 40d3d5deb..2827f60b9 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api.models import ModelType
+from llama_stack_api import ModelType
 
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index ba161ac22..1df933d4d 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,8 +8,7 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
-from llama_stack_api.datatypes import Api
-from llama_stack_api.models import ModelType
+from llama_stack_api import Api, ModelType
 from pydantic import TypeAdapter, ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index 4dafc9878..57a552514 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -144,7 +144,7 @@ def middleware_with_mocks(mock_auth_endpoint):
     middleware = AuthenticationMiddleware(mock_app, auth_config, {})
 
     # Mock the route_impls to simulate finding routes with required scopes
-    from llama_stack_api.schema_utils import WebMethod
+    from llama_stack_api import WebMethod
 
     routes = {
         ("POST", "/test/scoped"): WebMethod(route="/test/scoped", method="POST", required_scope="test.read"),
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 25609ff36..071178f96 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,8 +9,7 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
-from llama_stack_api.datatypes import InlineProviderSpec, ProviderSpec
-from llama_stack_api.inference import Inference
+from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index 91cc8bbfb..fdaf9022b 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,7 +9,7 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api.common.responses import PaginatedResponse
+from llama_stack_api import PaginatedResponse
 
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
 
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 366a06c17..79e0b6e28 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,7 +9,7 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
-from llama_stack_api.tools import ToolDef
+from llama_stack_api import ToolDef
 from pydantic import ValidationError
 
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index f3b0ab6d0..4da20b125 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,7 +7,7 @@
 import time
 
 import pytest
-from llama_stack_api.inference import (
+from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChoice,
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 26b458a57..1119a93d8 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,12 +9,7 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
-from llama_stack_api.agents import Order
-from llama_stack_api.inference import OpenAIMessageParam, OpenAIUserMessageParam
-from llama_stack_api.openai_responses import (
-    OpenAIResponseInput,
-    OpenAIResponseObject,
-)
+from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@@ -46,7 +41,7 @@ def create_test_response_object(
 
 def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput:
     """Helper to create a test response input."""
-    from llama_stack_api.openai_responses import OpenAIResponseMessage
+    from llama_stack_api import OpenAIResponseMessage
 
     return OpenAIResponseMessage(
         id=input_id,