diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index b0f2c6e69..b58f4eb69 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -31,7 +31,7 @@ jobs:
         version: 0.7.6
 
     - name: Build Llama Stack API package
-      working-directory: src/llama-stack-api
+      working-directory: src/llama_stack_api
       run: uv build
 
     - name: Build Llama Stack package
@@ -39,7 +39,7 @@ jobs:
 
     - name: Install Llama Stack package (with api stubs from local build)
       run: |
-        uv pip install --find-links src/llama-stack-api/dist dist/*.whl
+        uv pip install --find-links src/llama_stack_api/dist dist/*.whl
 
     - name: Verify Llama Stack package
       run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6f4dd6a0e..6e32d16b7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$
+        exclude: ^(src/llama_stack/strong_typing/.*|src/llama_stack_api/strong_typing/.*)$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
diff --git a/pyproject.toml b/pyproject.toml
index d287b4be7..34728d6ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -181,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
+include = ["llama_stack", "llama_stack.*", "llama_stack_api", "llama_stack_api.*"]
 
 [[tool.uv.index]]
 name = "pytorch-cpu"
@@ -191,7 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
-llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
+llama-stack-api = [{ path = "src/llama_stack_api", editable = true }]
 
 [tool.ruff]
 line-length = 120
@@ -258,7 +258,7 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["src", "src/llama-stack-api"]
+mypy_path = ["src"]
 packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
@@ -281,14 +281,12 @@ exclude = [
     "^src/llama_stack/core/store/registry\\.py$",
     "^src/llama_stack/core/utils/exec\\.py$",
     "^src/llama_stack/core/utils/prompt_for_config\\.py$",
-    # Moved to llama-stack-api but still excluded
     "^src/llama_stack/models/llama/llama3/interface\\.py$",
     "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
     "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
     "^src/llama_stack/models/llama/llama3/generation\\.py$",
     "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
     "^src/llama_stack/models/llama/llama4/",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
     "^src/llama_stack/providers/inline/agents/meta_reference/",
     "^src/llama_stack/providers/inline/datasetio/localfs/",
     "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
@@ -342,9 +340,7 @@ exclude = [
     "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
     "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
     "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
-    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
-    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
+    "^src/llama_stack_api/strong_typing/auxiliary\\.py$",
     "^src/llama_stack/distributions/template\\.py$",
 ]
 
diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
index 8099a3f0d..381bbc6a7 100755
--- a/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -14,11 +14,11 @@ import os
 from pathlib import Path
 
 import fire
-from llama_stack_api import ModelNotFoundError
 
 from llama_stack.models.llama.llama3.generation import Llama3
 from llama_stack.models.llama.llama4.generation import Llama4
 from llama_stack.models.llama.sku_list import resolve_model
+from llama_stack_api import ModelNotFoundError
 
 THIS_DIR = Path(__file__).parent.resolve()
 
diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
index 50fe394fc..82bef1a4f 100644
--- a/src/llama_stack/cli/stack/_list_deps.py
+++ b/src/llama_stack/cli/stack/_list_deps.py
@@ -9,7 +9,6 @@ import sys
 from pathlib import Path
 
 import yaml
-from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.cli.stack.utils import ImageType
@@ -22,6 +21,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
+from llama_stack_api import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 
diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
index 0a4e22b09..d49b142e0 100644
--- a/src/llama_stack/cli/stack/utils.py
+++ b/src/llama_stack/cli/stack/utils.py
@@ -11,7 +11,6 @@ from functools import lru_cache
 from pathlib import Path
 
 import yaml
-from llama_stack_api import Api
 from termcolor import cprint
 
 from llama_stack.core.datatypes import (
@@ -33,6 +32,7 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
+from llama_stack_api import Api
 
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
 
diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py
index 27ded7ede..630b2a47f 100644
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@@ -6,7 +6,6 @@
 
 import sys
 
-from llama_stack_api import Api
 from pydantic import BaseModel
 from termcolor import cprint
 
@@ -14,6 +13,7 @@ from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
+from llama_stack_api import Api
 
 log = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py
index 41acacdb5..ba935a35e 100644
--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@@ -12,10 +12,11 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin
 
 import httpx
-from llama_stack_api import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint
 
+from llama_stack_api import RemoteProviderConfig
+
 _CLIENT_CLASSES = {}
 
 
diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
index bdb3b9734..d738b8a61 100644
--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@@ -6,8 +6,6 @@
 import textwrap
 from typing import Any
 
-from llama_stack_api import Api, ProviderSpec
-
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     DistributionSpec,
@@ -22,6 +20,7 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ProviderSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
index b94cd4fdd..4cf5a82ee 100644
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@@ -8,6 +8,13 @@ import secrets
 import time
 from typing import Any, Literal
 
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     Conversation,
     ConversationDeletedResource,
@@ -18,13 +25,6 @@ from llama_stack_api import (
     Conversations,
     Metadata,
 )
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.core.datatypes import AccessRule, StackRunConfig
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 
 logger = get_logger(name=__name__, category="openai_conversations")
 
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 4231363b6..1e29690ff 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -9,6 +9,15 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse
 
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+from llama_stack.core.access_control.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import (
+    KVStoreReference,
+    StorageBackendType,
+    StorageConfig,
+)
+from llama_stack.log import LoggingConfig
 from llama_stack_api import (
     Api,
     Benchmark,
@@ -35,15 +44,6 @@ from llama_stack_api import (
     VectorStore,
     VectorStoreInput,
 )
-from pydantic import BaseModel, Field, field_validator, model_validator
-
-from llama_stack.core.access_control.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import (
-    KVStoreReference,
-    StorageBackendType,
-    StorageConfig,
-)
-from llama_stack.log import LoggingConfig
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
index 162f9f2b0..658c75ef2 100644
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@@ -10,17 +10,17 @@ import os
 from typing import Any
 
 import yaml
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
     RemoteProviderSpec,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py
index ce0c7eb72..d1a2d6e42 100644
--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@@ -6,10 +6,10 @@
 
 
 import yaml
-from llama_stack_api import Api, ExternalApiSpec
 
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ExternalApiSpec
 
 logger = get_logger(name=__name__, category="core")
 
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 53ddd3475..272c9d1bc 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -6,6 +6,11 @@
 
 from importlib.metadata import version
 
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack_api import (
     HealthInfo,
     HealthStatus,
@@ -14,11 +19,6 @@ from llama_stack_api import (
     RouteInfo,
     VersionInfo,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.server.routes import get_all_api_routes
 
 
 class DistributionInspectConfig(BaseModel):
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 959284720..2a224d915 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
+
 from llama_stack_api import is_unwrapped_body_param
 
 try:
diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
index d9532b978..9f532c1cd 100644
--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@@ -7,11 +7,11 @@
 import json
 from typing import Any
 
-from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack_api import ListPromptsResponse, Prompt, Prompts
 
 
 class PromptServiceConfig(BaseModel):
diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
index 7337d9e35..e3fe3c7b3 100644
--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@@ -7,10 +7,10 @@
 import asyncio
 from typing import Any
 
-from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
+from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers
 
 from .datatypes import StackRunConfig
 from .utils.config import redact_sensitive_fields
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index ca154fbc6..6bc32c2d0 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -8,6 +8,19 @@ import importlib.metadata
 import inspect
 from typing import Any
 
+from llama_stack.core.client import get_client_impl
+from llama_stack.core.datatypes import (
+    AccessRule,
+    AutoRoutedProviderSpec,
+    Provider,
+    RoutingTableProviderSpec,
+    StackRunConfig,
+)
+from llama_stack.core.distribution import builtin_automatically_routed_apis
+from llama_stack.core.external import load_external_apis
+from llama_stack.core.store import DistributionRegistry
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     LLAMA_STACK_API_V1ALPHA,
     Agents,
@@ -48,20 +61,6 @@ from llama_stack_api import (
     Providers as ProvidersAPI,
 )
 
-from llama_stack.core.client import get_client_impl
-from llama_stack.core.datatypes import (
-    AccessRule,
-    AutoRoutedProviderSpec,
-    Provider,
-    RoutingTableProviderSpec,
-    StackRunConfig,
-)
-from llama_stack.core.distribution import builtin_automatically_routed_apis
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.store import DistributionRegistry
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core")
 
 
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index c2d051422..289755bcb 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from llama_stack_api import Api, RoutingTable
-
 from llama_stack.core.datatypes import (
     AccessRule,
     RoutedProtocol,
@@ -15,6 +13,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack_api import Api, RoutingTable
 
 
 async def get_routing_table_impl(
diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
index dcf247874..b6a5f3b96 100644
--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
-
 from llama_stack.log import get_logger
+from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
index cbbbf5cc5..4d7269180 100644
--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+from llama_stack.log import get_logger
 from llama_stack_api import (
     BenchmarkConfig,
     Eval,
@@ -18,8 +19,6 @@ from llama_stack_api import (
     ScoringFnParams,
 )
 
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core::routers")
 
 
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index a538ab02e..acfe20399 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -11,6 +11,16 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import TypeAdapter
+
+from llama_stack.core.telemetry.telemetry import MetricEvent
+from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.chat_format import ChatFormat
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
@@ -39,16 +49,6 @@ from llama_stack_api import (
     RerankResponse,
     RoutingTable,
 )
-from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
-from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
-from pydantic import TypeAdapter
-
-from llama_stack.core.telemetry.telemetry import MetricEvent
-from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.inference_store import InferenceStore
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index f85bbb767..2bc99f14f 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
-
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
+from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
 
 logger = get_logger(name=__name__, category="core::routers")
 
diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
index 984a8e2a7..eccc05732 100644
--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@@ -6,14 +6,13 @@
 
 from typing import Any
 
+from llama_stack.log import get_logger
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
     ToolRuntime,
 )
 
-from llama_stack.log import get_logger
-
 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
 
 logger = get_logger(name=__name__, category="core::routers")
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index bfd090e32..02e56ed7e 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -9,6 +9,9 @@ import uuid
 from typing import Annotated, Any
 
 from fastapi import Body
+
+from llama_stack.core.datatypes import VectorStoresConfig
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Chunk,
     HealthResponse,
@@ -36,9 +39,6 @@ from llama_stack_api import (
     VectorStoreSearchResponsePage,
 )
 
-from llama_stack.core.datatypes import VectorStoresConfig
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core::routers")
 
 
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 66830bc41..9037ffe8b 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -6,12 +6,11 @@
 
 from typing import Any
 
-from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
-
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
 
 from .common import CommonRoutingTableImpl
 
diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
index cfbafc9a8..a9e3ff95f 100644
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
-
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import (
@@ -20,6 +18,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
+from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable
 
 logger = get_logger(name=__name__, category="core::routing_tables")
 
diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
index c49c9769b..62fd07b13 100644
--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@@ -7,6 +7,10 @@
 import uuid
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    DatasetWithOwner,
+)
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Dataset,
     DatasetNotFoundError,
@@ -20,11 +24,6 @@ from llama_stack_api import (
     URIDataSource,
 )
 
-from llama_stack.core.datatypes import (
-    DatasetWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
index e1210a139..1facbb27b 100644
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@@ -7,6 +7,13 @@
 import time
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    ModelWithOwner,
+    RegistryEntrySource,
+)
+from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListModelsResponse,
     Model,
@@ -17,14 +24,6 @@ from llama_stack_api import (
     OpenAIModel,
 )
 
-from llama_stack.core.datatypes import (
-    ModelWithOwner,
-    RegistryEntrySource,
-)
-from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl, lookup_model
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
index 66165ac2f..65ed26b85 100644
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@@ -4,6 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import (
+    ScoringFnWithOwner,
+)
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListScoringFunctionsResponse,
     ParamType,
@@ -13,11 +17,6 @@ from llama_stack_api import (
     ScoringFunctions,
 )
 
-from llama_stack.core.datatypes import (
-    ScoringFnWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
index 0f981c49d..97b2efb96 100644
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@@ -6,12 +6,11 @@
 
 from typing import Any
 
-from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
-
 from llama_stack.core.datatypes import (
     ShieldWithOwner,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
 
 from .common import CommonRoutingTableImpl
 
diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
index a552cb96e..7e2068608 100644
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@@ -6,6 +6,8 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
+from llama_stack.log import get_logger
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -16,9 +18,6 @@ from llama_stack_api import (
     ToolGroups,
 )
 
-from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index f95463b3c..93c119542 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -6,6 +6,11 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import (
+    VectorStoreWithOwner,
+)
+from llama_stack.log import get_logger
+
 # Removed VectorStores import to avoid exposing public API
 from llama_stack_api import (
     ModelNotFoundError,
@@ -23,11 +28,6 @@ from llama_stack_api import (
     VectorStoreSearchResponsePage,
 )
 
-from llama_stack.core.datatypes import (
-    VectorStoreWithOwner,
-)
-from llama_stack.log import get_logger
-
 from .common import CommonRoutingTableImpl, lookup_model
 
 logger = get_logger(name=__name__, category="core::routing_tables")
diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
index a7f5d7916..66942dd39 100644
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@@ -11,7 +11,6 @@ from urllib.parse import parse_qs, urljoin, urlparse
 
 import httpx
 import jwt
-from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
@@ -23,6 +22,7 @@ from llama_stack.core.datatypes import (
     User,
 )
 from llama_stack.log import get_logger
+from llama_stack_api import TokenValidationError
 
 logger = get_logger(name=__name__, category="core::auth")
 
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index e7a84937d..af5002565 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -10,10 +10,10 @@ from collections.abc import Callable
 from typing import Any
 
 from aiohttp import hdrs
-from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route
 
 from llama_stack.core.resolver import api_protocol_map
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 
 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 8116348ec..0d3513980 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -28,7 +28,6 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
-from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
@@ -57,6 +56,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 674c35f31..00d990cb1 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -12,6 +12,28 @@ import tempfile
 from typing import Any
 
 import yaml
+
+from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
+from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
+from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
+from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
+from llama_stack.core.resolver import ProviderRegistry, resolve_impls
+from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageBackendConfig,
+    StorageConfig,
+)
+from llama_stack.core.store.registry import create_dist_registry
+from llama_stack.core.utils.dynamic import instantiate_class_type
+from llama_stack.log import get_logger
 from llama_stack_api import (
     Agents,
     Api,
@@ -37,28 +59,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
-from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
-from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
-from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
-from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
-from llama_stack.core.resolver import ProviderRegistry, resolve_impls
-from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    ServerStoresConfig,
-    SqliteKVStoreConfig,
-    SqliteSqlStoreConfig,
-    SqlStoreReference,
-    StorageBackendConfig,
-    StorageConfig,
-)
-from llama_stack.core.store.registry import create_dist_registry
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.log import get_logger
-
 logger = get_logger(name=__name__, category="core")
 
 
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
index 1a56277ea..5268fa641 100644
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@@ -16,7 +16,6 @@ from typing import (
     cast,
 )
 
-from llama_stack_api import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -29,6 +28,7 @@ from pydantic import BaseModel, Field
 
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Primitive
+from llama_stack_api import json_schema_type, register_schema
 
 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
 
diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
index fd76e3ccb..52a07b7f1 100644
--- a/src/llama_stack/distributions/dell/dell.py
+++ b/src/llama_stack/distributions/dell/dell.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -18,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
+from llama_stack_api import ModelType
 
 
 def get_distribution_template() -> DistributionTemplate:
diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
index 67af0e92a..a515794d5 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
+++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
@@ -6,8 +6,6 @@
 
 from pathlib import Path
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -23,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack_api import ModelType
 
 
 def get_distribution_template() -> DistributionTemplate:
diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
index 59deca6d0..1f4dbf2c2 100644
--- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
@@ -5,8 +5,6 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
-
 from llama_stack.core.datatypes import (
     BenchmarkInput,
     BuildProvider,
@@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
 
 
 def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 1a8126290..4c21a8c99 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -7,8 +7,6 @@
 
 from typing import Any
 
-from llama_stack_api import RemoteProviderSpec
-
 from llama_stack.core.datatypes import (
     BuildProvider,
     Provider,
@@ -39,6 +37,7 @@ from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOC
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
 from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
+from llama_stack_api import RemoteProviderSpec
 
 
 def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index faf5fb085..5755a26de 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -10,7 +10,6 @@ from typing import Any, Literal
 import jinja2
 import rich
 import yaml
-from llama_stack_api import DatasetPurpose, ModelType
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import (
@@ -43,6 +42,7 @@ from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
+from llama_stack_api import DatasetPurpose, ModelType
 
 
 def filter_empty_values(obj: Any) -> Any:
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 025fcc676..347f6fdb1 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -5,6 +5,10 @@
 # the root directory of this source tree.
 
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
+from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack_api import (
     Agents,
     Conversations,
@@ -25,11 +29,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
-from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-
 from .config import MetaReferenceAgentsImplConfig
 from .responses.openai_responses import OpenAIResponsesImpl
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 347eeef78..3f88b1562 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -8,6 +8,13 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
+from pydantic import BaseModel, TypeAdapter
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.responses.responses_store import (
+    ResponsesStore,
+    _OpenAIResponseObjectWithInputAndMessages,
+)
 from llama_stack_api import (
     ConversationItem,
     Conversations,
@@ -34,13 +41,6 @@ from llama_stack_api import (
     ToolRuntime,
     VectorIO,
 )
-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.responses.responses_store import (
-    ResponsesStore,
-    _OpenAIResponseObjectWithInputAndMessages,
-)
 
 from .streaming import StreamingResponseOrchestrator
 from .tool_executor import ToolExecutor
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 6a791e92d..ea4486b62 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,6 +8,9 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
+from llama_stack.core.telemetry import tracing
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
     AllowedToolsFilter,
     ApprovalFilter,
@@ -65,10 +68,6 @@ from llama_stack_api import (
     WebSearchToolTypes,
 )
 
-from llama_stack.core.telemetry import tracing
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-
 from .types import ChatCompletionContext, ChatCompletionResult
 from .utils import (
     convert_chat_choice_to_response_message,
@@ -1022,11 +1021,11 @@ class StreamingResponseOrchestrator:
         self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput]
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         """Process all tools and emit appropriate streaming events."""
-        from llama_stack_api import ToolDef
         from openai.types.chat import ChatCompletionToolParam
 
         from llama_stack.models.llama.datatypes import ToolDefinition
         from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
+        from llama_stack_api import ToolDef
 
         def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam:
             tool_def = ToolDefinition(
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 38fb2a94f..616ec2477 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,6 +9,8 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
+from llama_stack.core.telemetry import tracing
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ImageContentItem,
     OpenAIChatCompletionContentPartImageParam,
@@ -37,9 +39,6 @@ from llama_stack_api import (
     VectorIO,
 )
 
-from llama_stack.core.telemetry import tracing
-from llama_stack.log import get_logger
-
 from .types import ChatCompletionContext, ToolExecutionResult
 
 logger = get_logger(name=__name__, category="agents::meta_reference")
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 35ad03378..f6efcee22 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -7,6 +7,9 @@
 from dataclasses import dataclass
 from typing import cast
 
+from openai.types.chat import ChatCompletionToolParam
+from pydantic import BaseModel
+
 from llama_stack_api import (
     OpenAIChatCompletionToolCall,
     OpenAIMessageParam,
@@ -26,8 +29,6 @@ from llama_stack_api import (
     OpenAIResponseTool,
     OpenAIResponseToolMCP,
 )
-from openai.types.chat import ChatCompletionToolParam
-from pydantic import BaseModel
 
 
 class ToolExecutionResult(BaseModel):
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index dd90ac298..bfb557a99 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,10 +6,9 @@
 
 import asyncio
 
-from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
-
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
+from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
 
 log = get_logger(name=__name__, category="agents::meta_reference")
 
diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
index 27d0f4213..11c4b06a9 100644
--- a/src/llama_stack/providers/inline/batches/reference/__init__.py
+++ b/src/llama_stack/providers/inline/batches/reference/__init__.py
@@ -6,10 +6,9 @@
 
 from typing import Any
 
-from llama_stack_api import Files, Inference, Models
-
 from llama_stack.core.datatypes import AccessRule, Api
 from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack_api import Files, Inference, Models
 
 from .batches import ReferenceBatchesImpl
 from .config import ReferenceBatchesImplConfig
diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
index f0f8da96c..73727799d 100644
--- a/src/llama_stack/providers/inline/batches/reference/batches.py
+++ b/src/llama_stack/providers/inline/batches/reference/batches.py
@@ -13,6 +13,11 @@ import uuid
 from io import BytesIO
 from typing import Any, Literal
 
+from openai.types.batch import BatchError, Errors
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import KVStore
 from llama_stack_api import (
     Batches,
     BatchObject,
@@ -33,11 +38,6 @@ from llama_stack_api import (
     OpenAIUserMessageParam,
     ResourceNotFoundError,
 )
-from openai.types.batch import BatchError, Errors
-from pydantic import BaseModel
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore
 
 from .config import ReferenceBatchesImplConfig
 
diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index 1fcfbbef4..6ab1a540f 100644
--- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -5,11 +5,10 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
-
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from .config import LocalFSDatasetIOConfig
 
diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
index e6020e8a3..d43e569e2 100644
--- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -6,6 +6,10 @@
 import json
 from typing import Any
 
+from tqdm import tqdm
+
+from llama_stack.providers.utils.common.data_schema_validator import ColumnName
+from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack_api import (
     Agents,
     Benchmark,
@@ -24,10 +28,6 @@ from llama_stack_api import (
     OpenAIUserMessageParam,
     Scoring,
 )
-from tqdm import tqdm
-
-from llama_stack.providers.utils.common.data_schema_validator import ColumnName
-from llama_stack.providers.utils.kvstore import kvstore_impl
 
 from .config import MetaReferenceEvalConfig
 
diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
index 5e8c887f1..5fb35a378 100644
--- a/src/llama_stack/providers/inline/files/localfs/files.py
+++ b/src/llama_stack/providers/inline/files/localfs/files.py
@@ -10,6 +10,14 @@ from pathlib import Path
 from typing import Annotated
 
 from fastapi import Depends, File, Form, Response, UploadFile
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -21,14 +29,6 @@ from llama_stack_api import (
     ResourceNotFoundError,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
-
 from .config import LocalfsFilesImplConfig
 
 logger = get_logger(name=__name__, category="files")
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
index 802e79f15..ec6e8bfe8 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import QuantizationConfig
 from pydantic import BaseModel, field_validator
 
 from llama_stack.providers.utils.inference import supported_inference_models
+from llama_stack_api import QuantizationConfig
 
 
 class MetaReferenceInferenceConfig(BaseModel):
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
index 2155a1ae8..6781d0af9 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -8,6 +8,14 @@ import math
 from typing import Optional
 
 import torch
+from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
+
+from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
+from llama_stack.models.llama.llama3.generation import Llama3
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
+from llama_stack.models.llama.llama4.generation import Llama4
+from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
+from llama_stack.models.llama.sku_types import Model, ModelFamily
 from llama_stack_api import (
     GreedySamplingStrategy,
     JsonSchemaResponseFormat,
@@ -18,14 +26,6 @@ from llama_stack_api import (
     SamplingParams,
     TopPSamplingStrategy,
 )
-from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData
-
-from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat
-from llama_stack.models.llama.llama3.generation import Llama3
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer
-from llama_stack.models.llama.llama4.generation import Llama4
-from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer
-from llama_stack.models.llama.sku_types import Model, ModelFamily
 
 from .common import model_checkpoint_dir
 from .config import MetaReferenceInferenceConfig
diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
index 753185fe7..42d1299ab 100644
--- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -9,23 +9,6 @@ import time
 import uuid
 from collections.abc import AsyncIterator
 
-from llama_stack_api import (
-    InferenceProvider,
-    Model,
-    ModelsProtocolPrivate,
-    ModelType,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIChatCompletionUsage,
-    OpenAIChoice,
-    OpenAICompletion,
-    OpenAICompletionRequestWithExtraBody,
-    OpenAIUserMessageParam,
-    ToolChoice,
-)
-
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition
 from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat
@@ -48,6 +31,22 @@ from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
     build_hf_repo_model_entry,
 )
+from llama_stack_api import (
+    InferenceProvider,
+    Model,
+    ModelsProtocolPrivate,
+    ModelType,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIChatCompletionUsage,
+    OpenAIChoice,
+    OpenAICompletion,
+    OpenAICompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
+    ToolChoice,
+)
 
 from .config import MetaReferenceInferenceConfig
 from .generators import LlamaGenerator
@@ -441,6 +440,8 @@ class MetaReferenceInferenceImpl(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> AsyncIterator[OpenAIChatCompletionChunk]:
         """Stream chat completion chunks as they're generated."""
+        from llama_stack.models.llama.datatypes import StopReason
+        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
         from llama_stack_api import (
             OpenAIChatCompletionChunk,
             OpenAIChatCompletionToolCall,
@@ -449,9 +450,6 @@ class MetaReferenceInferenceImpl(
             OpenAIChunkChoice,
         )
 
-        from llama_stack.models.llama.datatypes import StopReason
-        from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message
-
         response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
         created = int(time.time())
         generated_text = ""
diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 14c9a41a4..b5cadeec2 100644
--- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -6,6 +6,10 @@
 
 from collections.abc import AsyncIterator
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.embedding_mixin import (
+    SentenceTransformerEmbeddingMixin,
+)
 from llama_stack_api import (
     InferenceProvider,
     Model,
@@ -18,11 +22,6 @@ from llama_stack_api import (
     OpenAICompletionRequestWithExtraBody,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.embedding_mixin import (
-    SentenceTransformerEmbeddingMixin,
-)
-
 from .config import SentenceTransformersInferenceConfig
 
 log = get_logger(name=__name__, category="inference")
diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
index 7a85d0e03..cc018c865 100644
--- a/src/llama_stack/providers/inline/post_training/common/validator.py
+++ b/src/llama_stack/providers/inline/post_training/common/validator.py
@@ -12,11 +12,10 @@
 
 from typing import Any
 
-from llama_stack_api import ChatCompletionInputType, DialogType, StringType
-
 from llama_stack.providers.utils.common.data_schema_validator import (
     ColumnName,
 )
+from llama_stack_api import ChatCompletionInputType, DialogType, StringType
 
 EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = {
     "instruct": [
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
index f3f3d8d56..fa939d439 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Any
 
+from llama_stack.providers.inline.post_training.huggingface.config import (
+    HuggingFacePostTrainingConfig,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
@@ -20,12 +25,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.providers.inline.post_training.huggingface.config import (
-    HuggingFacePostTrainingConfig,
-)
-from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
-from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-
 
 class TrainingArtifactType(Enum):
     CHECKPOINT = "checkpoint"
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
index 58a30618c..c7c737fbd 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
@@ -12,14 +12,6 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api import (
-    Checkpoint,
-    DataConfig,
-    DatasetIO,
-    Datasets,
-    LoraFinetuningConfig,
-    TrainingConfig,
-)
 from peft import LoraConfig
 from transformers import (
     AutoTokenizer,
@@ -28,6 +20,14 @@ from trl import SFTConfig, SFTTrainer
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    TrainingConfig,
+)
 
 from ..config import HuggingFacePostTrainingConfig
 from ..utils import (
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
index f7dc3ebf2..da2626555 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
@@ -11,13 +11,6 @@ from typing import Any
 
 import torch
 from datasets import Dataset
-from llama_stack_api import (
-    Checkpoint,
-    DatasetIO,
-    Datasets,
-    DPOAlignmentConfig,
-    TrainingConfig,
-)
 from transformers import (
     AutoTokenizer,
 )
@@ -25,6 +18,13 @@ from trl import DPOConfig, DPOTrainer
 
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device
+from llama_stack_api import (
+    Checkpoint,
+    DatasetIO,
+    Datasets,
+    DPOAlignmentConfig,
+    TrainingConfig,
+)
 
 from ..config import HuggingFacePostTrainingConfig
 from ..utils import (
diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
index 86c3c3f52..2037f70e7 100644
--- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py
+++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
@@ -14,9 +14,10 @@ from typing import TYPE_CHECKING, Any, Protocol
 import psutil
 import torch
 from datasets import Dataset
-from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 
+from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig
+
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
 
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
index 1483b8385..f929ea4dd 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
@@ -13,7 +13,6 @@
 from collections.abc import Callable
 
 import torch
-from llama_stack_api import DatasetFormat
 from pydantic import BaseModel
 from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages
 from torchtune.models.llama3 import llama3_tokenizer
@@ -24,6 +23,7 @@ from torchtune.modules.transforms import Transform
 
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import Model
+from llama_stack_api import DatasetFormat
 
 BuildLoraModelCallable = Callable[..., torch.nn.Module]
 BuildTokenizerCallable = Callable[..., Llama3Tokenizer]
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
index 3370d42fa..515ff7b66 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
@@ -6,6 +6,11 @@
 from enum import Enum
 from typing import Any
 
+from llama_stack.providers.inline.post_training.torchtune.config import (
+    TorchtunePostTrainingConfig,
+)
+from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
+from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
 from llama_stack_api import (
     AlgorithmConfig,
     Checkpoint,
@@ -21,12 +26,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.providers.inline.post_training.torchtune.config import (
-    TorchtunePostTrainingConfig,
-)
-from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler
-from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus
-
 
 class TrainingArtifactType(Enum):
     CHECKPOINT = "checkpoint"
diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 2bf1d0fe7..f5e5db415 100644
--- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -12,17 +12,6 @@ from pathlib import Path
 from typing import Any
 
 import torch
-from llama_stack_api import (
-    Checkpoint,
-    DataConfig,
-    DatasetIO,
-    Datasets,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    PostTrainingMetric,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 from torch import nn
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader, DistributedSampler
@@ -56,6 +45,17 @@ from llama_stack.providers.inline.post_training.torchtune.config import (
     TorchtunePostTrainingConfig,
 )
 from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset
+from llama_stack_api import (
+    Checkpoint,
+    DataConfig,
+    DatasetIO,
+    Datasets,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    PostTrainingMetric,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 
 log = get_logger(name=__name__, category="post_training")
 
diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
index 80e907c10..071fbe2dc 100644
--- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
+++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
@@ -10,6 +10,10 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from codeshield.cs import CodeShieldScanResult
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     ModerationObject,
     ModerationObjectResults,
@@ -21,11 +25,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 from .config import CodeScannerConfig
 
 log = get_logger(name=__name__, category="safety")
diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
index 36e4280b9..ff1536bea 100644
--- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@@ -9,6 +9,13 @@ import uuid
 from string import Template
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import Role
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     ImageContentItem,
     Inference,
@@ -26,14 +33,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import Role
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 from .config import LlamaGuardConfig
 
 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index b4f495f19..51383da1b 100644
--- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -7,6 +7,11 @@
 from typing import Any
 
 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+from llama_stack.core.utils.model_utils import model_local_dir
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
     ModerationObject,
     OpenAIMessageParam,
@@ -18,11 +23,6 @@ from llama_stack_api import (
     ShieldStore,
     ViolationLevel,
 )
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-from llama_stack.core.utils.model_utils import model_local_dir
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 
 from .config import PromptGuardConfig, PromptGuardType
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
index 326fd9211..cf5cb79ba 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py
@@ -5,6 +5,11 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -17,12 +22,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-)
-
 from .config import BasicScoringConfig
 from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
 from .scoring_fn.equality_scoring_fn import EqualityScoringFn
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
index 93c2627dd..e48bab8fa 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
@@ -8,9 +8,8 @@ import json
 import re
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.docvqa import docvqa
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
index 382c64d88..2e79240be 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.equality import equality
 
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
index 4ec85bb09..33b1c5a31 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.ifeval import (
     ifeval,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
index 4e9d49e96..1f4f2f979 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex
 from .fn_defs.regex_parser_math_response import (
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
index 7f213b38c..1cc74f874 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow
 
 from .fn_defs.regex_parser_multiple_choice_answer import (
     regex_parser_multiple_choice_answer,
diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
index b291924d5..fe15a4972 100644
--- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
@@ -6,9 +6,8 @@
 
 from typing import Any
 
-from llama_stack_api import ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import ScoringFnParams, ScoringResultRow
 
 from .fn_defs.subset_of import subset_of
 
diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index cbab93c74..cfa35547b 100644
--- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -17,6 +17,16 @@ from autoevals.ragas import (
     ContextRelevancy,
     Faithfulness,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import Api
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+    validate_row_schema,
+)
+from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -29,16 +39,6 @@ from llama_stack_api import (
     ScoringResult,
     ScoringResultRow,
 )
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import Api
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-    validate_row_schema,
-)
-from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
 
 from .config import BraintrustScoringConfig
 from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index aa636d2b3..23e6ad705 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -5,6 +5,11 @@
 # the root directory of this source tree.
 from typing import Any
 
+from llama_stack.core.datatypes import Api
+from llama_stack.providers.utils.common.data_schema_validator import (
+    get_valid_schemas,
+    validate_dataset_schema,
+)
 from llama_stack_api import (
     DatasetIO,
     Datasets,
@@ -18,12 +23,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.core.datatypes import Api
-from llama_stack.providers.utils.common.data_schema_validator import (
-    get_valid_schemas,
-    validate_dataset_schema,
-)
-
 from .config import LlmAsJudgeScoringConfig
 from .scoring_fn.llm_as_judge_scoring_fn import LlmAsJudgeScoringFn
 
diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index 169a4d8b7..73ce82cda 100644
--- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,9 +6,8 @@
 import re
 from typing import Any
 
-from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
+from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow
 
 from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa
 from .fn_defs.llm_as_judge_base import llm_as_judge_base
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index f499989cb..240df199b 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -6,6 +6,10 @@
 
 
 from jinja2 import Template
+
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack_api import (
     DefaultRAGQueryGeneratorConfig,
     InterleavedContent,
@@ -16,10 +20,6 @@ from llama_stack_api import (
     RAGQueryGeneratorConfig,
 )
 
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-
 
 async def generate_rag_query(
     config: RAGQueryGeneratorConfig,
diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
index aacb7bb38..895d219bb 100644
--- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -12,6 +12,11 @@ from typing import Any
 
 import httpx
 from fastapi import UploadFile
+from pydantic import TypeAdapter
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
+from llama_stack.providers.utils.memory.vector_store import parse_data_url
 from llama_stack_api import (
     URL,
     Files,
@@ -34,11 +39,6 @@ from llama_stack_api import (
     VectorStoreChunkingStrategyStatic,
     VectorStoreChunkingStrategyStaticConfig,
 )
-from pydantic import TypeAdapter
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.memory.vector_store import parse_data_url
 
 from .config import RagToolRuntimeConfig
 from .context_retriever import generate_rag_query
diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
index d955b1d06..3897991f5 100644
--- a/src/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
index dd433f818..d516d9fe9 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index abef42499..d52a54e6a 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -12,6 +12,13 @@ from typing import Any
 
 import faiss  # type: ignore[import-untyped]
 import numpy as np
+from numpy.typing import NDArray
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -25,13 +32,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import FaissVectorIOConfig
 
diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
index 08d05c991..14ddd2362 100644
--- a/src/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
index 437d643f0..4251f2f39 100644
--- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -7,10 +7,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index e979ff323..74bc349a5 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -12,16 +12,6 @@ from typing import Any
 
 import numpy as np
 import sqlite_vec  # type: ignore[import-untyped]
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 
 from llama_stack.log import get_logger
@@ -35,6 +25,16 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 logger = get_logger(name=__name__, category="vector_io")
 
diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
index bd204cecd..455be1ae7 100644
--- a/src/llama_stack/providers/registry/agents.py
+++ b/src/llama_stack/providers/registry/agents.py
@@ -5,14 +5,13 @@
 # the root directory of this source tree.
 
 
+from llama_stack.providers.utils.kvstore import kvstore_dependencies
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
 )
 
-from llama_stack.providers.utils.kvstore import kvstore_dependencies
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
index dfc527816..024254b57 100644
--- a/src/llama_stack/providers/registry/files.py
+++ b/src/llama_stack/providers/registry/files.py
@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
-
 from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
+from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
 
 
 def available_providers() -> list[ProviderSpec]:
diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
index 3f0a83a30..d34312353 100644
--- a/src/llama_stack/providers/registry/tool_runtime.py
+++ b/src/llama_stack/providers/registry/tool_runtime.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 
+from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 from llama_stack_api import (
     Api,
     InlineProviderSpec,
@@ -12,8 +13,6 @@ from llama_stack_api import (
     RemoteProviderSpec,
 )
 
-from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
-
 
 def available_providers() -> list[ProviderSpec]:
     return [
diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 1260ce644..72069f716 100644
--- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -6,10 +6,9 @@
 from typing import Any
 from urllib.parse import parse_qs, urlparse
 
-from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
-
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.pagination import paginate_records
+from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
 
 from .config import HuggingfaceDatasetIOConfig
 
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index cb674b0d7..2f5548fa9 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -7,6 +7,7 @@
 from typing import Any
 
 import aiohttp
+
 from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
 
 from .config import NvidiaDatasetIOConfig
diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
index fbdec0d4d..5802cb098 100644
--- a/src/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -6,6 +6,8 @@
 from typing import Any
 
 import requests
+
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
     Agents,
     Benchmark,
@@ -22,8 +24,6 @@ from llama_stack_api import (
     ScoringResult,
 )
 
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
-
 from .config import NVIDIAEvalConfig
 
 DEFAULT_NAMESPACE = "nvidia"
diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py
index bbd630977..d2f5a08eb 100644
--- a/src/llama_stack/providers/remote/files/openai/files.py
+++ b/src/llama_stack/providers/remote/files/openai/files.py
@@ -8,6 +8,12 @@ from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Depends, File, Form, Response, UploadFile
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -18,12 +24,6 @@ from llama_stack_api import (
     Order,
     ResourceNotFoundError,
 )
-
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from openai import OpenAI
 
 from .config import OpenAIFilesImplConfig
diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
index 14f1e3852..68822eb77 100644
--- a/src/llama_stack/providers/remote/files/s3/files.py
+++ b/src/llama_stack/providers/remote/files/s3/files.py
@@ -17,6 +17,12 @@ from fastapi import Depends, File, Form, Response, UploadFile
 if TYPE_CHECKING:
     from mypy_boto3_s3.client import S3Client
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.providers.utils.files.form_data import parse_expires_after
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 from llama_stack_api import (
     ExpiresAfter,
     Files,
@@ -28,13 +34,6 @@ from llama_stack_api import (
     ResourceNotFoundError,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.providers.utils.files.form_data import parse_expires_after
-from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
-
 from .config import S3FilesImplConfig
 
 # TODO: provider data for S3 credentials
diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
index 7ee4c54e2..b706b90e1 100644
--- a/src/llama_stack/providers/remote/inference/anthropic/config.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class AnthropicProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
index 596f6c234..b801b91b2 100644
--- a/src/llama_stack/providers/remote/inference/azure/config.py
+++ b/src/llama_stack/providers/remote/inference/azure/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, HttpUrl, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class AzureProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 1a9fe533b..70ee95916 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -6,6 +6,11 @@
 
 from collections.abc import AsyncIterator, Iterable
 
+from openai import AuthenticationError
+
+from llama_stack.core.telemetry.tracing import get_current_span
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -15,11 +20,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from openai import AuthenticationError
-
-from llama_stack.core.telemetry.tracing import get_current_span
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import BedrockConfig
 
diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
index c7f3111f9..680431e22 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -6,13 +6,12 @@
 
 from urllib.parse import urljoin
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import CerebrasImplConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
index a1fd41e2d..db357fd1c 100644
--- a/src/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/src/llama_stack/providers/remote/inference/cerebras/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 DEFAULT_BASE_URL = "https://api.cerebras.ai"
 
diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
index 4974593d2..bd409fa13 100644
--- a/src/llama_stack/providers/remote/inference/databricks/config.py
+++ b/src/llama_stack/providers/remote/inference/databricks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class DatabricksProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
index 8b802379f..c07d97b67 100644
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -7,10 +7,10 @@
 from collections.abc import Iterable
 
 from databricks.sdk import WorkspaceClient
-from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
 
 from .config import DatabricksImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
index d786655eb..e36c76054 100644
--- a/src/llama_stack/providers/remote/inference/fireworks/config.py
+++ b/src/llama_stack/providers/remote/inference/fireworks/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
index 6c25c005c..46cec7d0d 100644
--- a/src/llama_stack/providers/remote/inference/gemini/config.py
+++ b/src/llama_stack/providers/remote/inference/gemini/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class GeminiProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
index 79d694f06..f6f48cc2b 100644
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@@ -6,6 +6,7 @@
 
 from typing import Any
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsRequestWithExtraBody,
@@ -13,8 +14,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import GeminiConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
index cec327716..cca53a4e8 100644
--- a/src/llama_stack/providers/remote/inference/groq/config.py
+++ b/src/llama_stack/providers/remote/inference/groq/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class GroqProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index c16311830..ded210d89 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class LlamaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 1dea3e3cb..a5f67ecd1 100644
--- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAICompletion,
     OpenAICompletionRequestWithExtraBody,
@@ -11,10 +14,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 logger = get_logger(name=__name__, category="inference::llama_openai_compat")
 
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
index 6ff98d290..e5b0c6b73 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class NVIDIAProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 9e4c6f559..17f8775bf 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -8,6 +8,9 @@
 from collections.abc import Iterable
 
 import aiohttp
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     Model,
     ModelType,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     RerankResponse,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from . import NVIDIAConfig
 from .utils import _is_nvidia_hosted
 
diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py
index 24b4ad926..93cc36d76 100644
--- a/src/llama_stack/providers/remote/inference/oci/config.py
+++ b/src/llama_stack/providers/remote/inference/oci/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class OCIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py
index 36e56cf6c..239443963 100644
--- a/src/llama_stack/providers/remote/inference/oci/oci.py
+++ b/src/llama_stack/providers/remote/inference/oci/oci.py
@@ -10,11 +10,6 @@ from typing import Any
 
 import httpx
 import oci
-from llama_stack_api import (
-    ModelType,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
 from oci.generative_ai.generative_ai_client import GenerativeAiClient
 from oci.generative_ai.models import ModelCollection
 from openai._base_client import DefaultAsyncHttpxClient
@@ -23,6 +18,11 @@ from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
 from llama_stack.providers.remote.inference.oci.config import OCIConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    ModelType,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 
 logger = get_logger(name=__name__, category="inference::oci")
 
diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
index 6a471429e..d1bf85361 100644
--- a/src/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -7,17 +7,17 @@
 
 import asyncio
 
+from ollama import AsyncClient as AsyncOllamaClient
+
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
     Model,
     UnsupportedModelError,
 )
-from ollama import AsyncClient as AsyncOllamaClient
-
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 logger = get_logger(name=__name__, category="inference::ollama")
 
diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
index cbb01b2d0..ab28e571f 100644
--- a/src/llama_stack/providers/remote/inference/openai/config.py
+++ b/src/llama_stack/providers/remote/inference/openai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class OpenAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index 7045dbf2e..54508b6fb 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 19cf0c5d7..75eedf026 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -6,6 +6,9 @@
 
 from collections.abc import AsyncIterator
 
+from openai import AsyncOpenAI
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     Inference,
     Model,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
 )
-from openai import AsyncOpenAI
-
-from llama_stack.core.request_headers import NeedsRequestProviderData
 
 from .config import PassthroughImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
index aaa4230a8..2ee56ca94 100644
--- a/src/llama_stack/providers/remote/inference/runpod/config.py
+++ b/src/llama_stack/providers/remote/inference/runpod/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class RunpodProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
index 4596b2df5..9c770cc24 100644
--- a/src/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -6,14 +6,13 @@
 
 from collections.abc import AsyncIterator
 
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChatCompletionRequestWithExtraBody,
 )
 
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-
 from .config import RunpodImplConfig
 
 
diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
index 6d72e7205..93679ba99 100644
--- a/src/llama_stack/providers/remote/inference/sambanova/config.py
+++ b/src/llama_stack/providers/remote/inference/sambanova/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class SambaNovaProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
index 051a2afa3..74edc8523 100644
--- a/src/llama_stack/providers/remote/inference/tgi/config.py
+++ b/src/llama_stack/providers/remote/inference/tgi/config.py
@@ -5,10 +5,10 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
index 831a26e39..dd47ccc62 100644
--- a/src/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -8,14 +8,14 @@
 from collections.abc import Iterable
 
 from huggingface_hub import AsyncInferenceClient, HfApi
-from llama_stack_api import (
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-)
 from pydantic import SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+)
 
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
index 96c0538e3..c1b3c4a55 100644
--- a/src/llama_stack/providers/remote/inference/together/config.py
+++ b/src/llama_stack/providers/remote/inference/together/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
index f1355a760..cd34aec5e 100644
--- a/src/llama_stack/providers/remote/inference/together/together.py
+++ b/src/llama_stack/providers/remote/inference/together/together.py
@@ -8,18 +8,18 @@
 from collections.abc import Iterable
 from typing import Any, cast
 
-from llama_stack_api import (
-    Model,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-)
 from together import AsyncTogether  # type: ignore[import-untyped]
 from together.constants import BASE_URL  # type: ignore[import-untyped]
 
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import (
+    Model,
+    OpenAIEmbeddingsRequestWithExtraBody,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 
 from .config import TogetherImplConfig
 
diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
index 53e2b3e65..5891f7cd0 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/config.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class VertexAIProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
index 23f713961..c43533ee4 100644
--- a/src/llama_stack/providers/remote/inference/vllm/config.py
+++ b/src/llama_stack/providers/remote/inference/vllm/config.py
@@ -6,10 +6,10 @@
 
 from pathlib import Path
 
-from llama_stack_api import json_schema_type
 from pydantic import Field, SecretStr, field_validator
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
index f7938c22c..1510e9384 100644
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,6 +7,10 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin
 
 import httpx
+from pydantic import ConfigDict
+
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
     HealthResponse,
     HealthStatus,
@@ -15,10 +19,6 @@ from llama_stack_api import (
     OpenAIChatCompletionRequestWithExtraBody,
     ToolChoice,
 )
-from pydantic import ConfigDict
-
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import VLLMInferenceAdapterConfig
 
diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
index 1bba040ef..914f80820 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,10 +7,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack_api import json_schema_type
 
 
 class WatsonXProviderDataValidator(BaseModel):
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index de23c25d7..aab9e2dca 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -9,6 +9,12 @@ from typing import Any
 
 import litellm
 import requests
+
+from llama_stack.core.telemetry.tracing import get_current_span
+from llama_stack.log import get_logger
+from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
 from llama_stack_api import (
     Model,
     ModelType,
@@ -22,12 +28,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
 )
 
-from llama_stack.core.telemetry.tracing import get_current_span
-from llama_stack.log import get_logger
-from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
-from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
-
 logger = get_logger(name=__name__, category="providers::remote::watsonx")
 
 
@@ -238,9 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         )
 
         # Convert response to OpenAI format
-        from llama_stack_api import OpenAIEmbeddingUsage
-
         from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
+        from llama_stack_api import OpenAIEmbeddingUsage
 
         data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
index 02c35241b..830a9f747 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -8,6 +8,11 @@ from datetime import datetime
 from typing import Any, Literal
 
 import aiohttp
+from pydantic import BaseModel, ConfigDict
+
+from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
+from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
     AlgorithmConfig,
     DPOAlignmentConfig,
@@ -17,11 +22,6 @@ from llama_stack_api import (
     PostTrainingJobStatusResponse,
     TrainingConfig,
 )
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
-from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 
 from .models import _MODEL_ENTRIES
 
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
index 78762155d..bd40dacb4 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py
+++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
@@ -7,11 +7,11 @@
 import warnings
 from typing import Any
 
-from llama_stack_api import TrainingConfig
 from pydantic import BaseModel
 
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig
+from llama_stack_api import TrainingConfig
 
 from .config import NvidiaPostTrainingConfig
 
diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
index 86b93c32e..c321f759b 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
@@ -7,6 +7,8 @@
 import json
 from typing import Any
 
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack_api import (
     OpenAIMessageParam,
     RunShieldResponse,
@@ -17,9 +19,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.bedrock.client import create_bedrock_client
-
 from .config import BedrockSafetyConfig
 
 logger = get_logger(name=__name__, category="safety::bedrock")
diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
index ca28924d4..0b1f2581a 100644
--- a/src/llama_stack/providers/remote/safety/bedrock/config.py
+++ b/src/llama_stack/providers/remote/safety/bedrock/config.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import json_schema_type
-
 from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
index fc686ae73..f11de5feb 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/config.py
@@ -6,9 +6,10 @@
 import os
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
+from llama_stack_api import json_schema_type
+
 
 @json_schema_type
 class NVIDIASafetyConfig(BaseModel):
diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
index b3b5090e0..43ff45cc9 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -7,6 +7,8 @@
 from typing import Any
 
 import requests
+
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ModerationObject,
     OpenAIMessageParam,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.log import get_logger
-
 from .config import NVIDIASafetyConfig
 
 logger = get_logger(name=__name__, category="safety::nvidia")
diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
index a8e745851..bfb42d88a 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/config.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/config.py
@@ -6,9 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field, SecretStr
 
+from llama_stack_api import json_schema_type
+
 
 class SambaNovaProviderDataValidator(BaseModel):
     sambanova_api_key: str | None = Field(
diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 119ebb6ed..c11cb544d 100644
--- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -8,6 +8,9 @@ from typing import Any
 
 import litellm
 import requests
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
 from llama_stack_api import (
     OpenAIMessageParam,
     RunShieldResponse,
@@ -18,9 +21,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-
 from .config import SambaNovaSafetyConfig
 
 logger = get_logger(name=__name__, category="safety::sambanova")
diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
index 84e47dd4f..a5a53a9eb 100644
--- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import BingSearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
index b7eee776a..4888730e4 100644
--- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
@@ -7,6 +7,9 @@
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.models.llama.datatypes import BuiltinTool
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -17,9 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import BuiltinTool
-
 from .config import BraveSearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index efb1eb2df..544597a51 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -7,6 +7,9 @@
 from typing import Any
 from urllib.parse import urlparse
 
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
 from llama_stack_api import (
     URL,
     Api,
@@ -17,10 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
-
 from .config import MCPProviderConfig
 
 logger = get_logger(__name__, category="tools")
diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
index d65d66e67..d86cf5d8e 100644
--- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
+++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import TavilySearchToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
index 9cc865092..f8d806a5c 100644
--- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
+++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
@@ -8,6 +8,8 @@ import json
 from typing import Any
 
 import httpx
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack_api import (
     URL,
     ListToolDefsResponse,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     ToolRuntime,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-
 from .config import WolframAlphaToolConfig
 
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index eca5d349b..645b40661 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -9,6 +9,14 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
+from numpy.typing import NDArray
+
+from llama_stack.log import get_logger
+from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -19,14 +27,6 @@ from llama_stack_api import (
     VectorStore,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-
-from llama_stack.log import get_logger
-from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
index b1e4f9a4a..648d641ad 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
index 2e2c788c7..4b9d6a566 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, ConfigDict, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index b856bf918..aefa20317 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -8,17 +8,6 @@ import asyncio
 import os
 from typing import Any
 
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
@@ -34,6 +23,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
index aeb1c83bb..87d40a883 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 8aa0303b6..2901bad97 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -8,17 +8,6 @@ import heapq
 from typing import Any
 
 import psycopg2
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import Json, execute_values
@@ -31,6 +20,17 @@ from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import PGVectorVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
index 8cc4cbb2b..e0a3fe207 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 53d6be2b6..20ab653d0 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -9,6 +9,15 @@ import hashlib
 import uuid
 from typing import Any
 
+from numpy.typing import NDArray
+from qdrant_client import AsyncQdrantClient, models
+from qdrant_client.models import PointStruct
+
+from llama_stack.log import get_logger
+from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack_api import (
     Chunk,
     Files,
@@ -22,15 +31,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
     VectorStoresProtocolPrivate,
 )
-from numpy.typing import NDArray
-from qdrant_client import AsyncQdrantClient, models
-from qdrant_client.models import PointStruct
-
-from llama_stack.log import get_logger
-from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 
 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
index 19f9679fb..75d1b7c51 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -6,10 +6,10 @@
 
 from typing import Any
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack_api import json_schema_type
 
 
 @json_schema_type
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index c72666f63..ba3e6b7ea 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -8,17 +8,6 @@ from typing import Any
 
 import weaviate
 import weaviate.classes as wvc
-from llama_stack_api import (
-    Chunk,
-    Files,
-    Inference,
-    InterleavedContent,
-    QueryChunksResponse,
-    VectorIO,
-    VectorStore,
-    VectorStoreNotFoundError,
-    VectorStoresProtocolPrivate,
-)
 from numpy.typing import NDArray
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
@@ -35,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import (
     VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
+from llama_stack_api import (
+    Chunk,
+    Files,
+    Inference,
+    InterleavedContent,
+    QueryChunksResponse,
+    VectorIO,
+    VectorStore,
+    VectorStoreNotFoundError,
+    VectorStoresProtocolPrivate,
+)
 
 from .config import WeaviateVectorIOConfig
 
diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
index 7ef245779..c9a3b0920 100644
--- a/src/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/src/llama_stack/providers/utils/common/data_schema_validator.py
@@ -7,9 +7,8 @@
 from enum import Enum
 from typing import Any
 
-from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
-
 from llama_stack.core.datatypes import Api
+from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
 
 
 class ColumnName(Enum):
diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
index 21afbec2b..3fac14f38 100644
--- a/src/llama_stack/providers/utils/files/form_data.py
+++ b/src/llama_stack/providers/utils/files/form_data.py
@@ -7,9 +7,10 @@
 import json
 
 from fastapi import Request
-from llama_stack_api import ExpiresAfter
 from pydantic import BaseModel, ValidationError
 
+from llama_stack_api import ExpiresAfter
+
 
 async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
     """
diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
index 3c707dd01..49e3af7a1 100644
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@@ -6,6 +6,11 @@
 import asyncio
 from typing import Any
 
+from sqlalchemy.exc import IntegrityError
+
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIChatCompletionResponse,
     OpenAIChatCompletion,
@@ -13,11 +18,6 @@ from llama_stack_api import (
     OpenAIMessageParam,
     Order,
 )
-from sqlalchemy.exc import IntegrityError
-
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
-from llama_stack.log import get_logger
 
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 4f468725b..c462d1aad 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -9,6 +9,13 @@ import struct
 from collections.abc import AsyncIterator
 
 import litellm
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+from llama_stack.providers.utils.inference.openai_compat import (
+    prepare_openai_completion_params,
+)
 from llama_stack_api import (
     InferenceProvider,
     OpenAIChatCompletion,
@@ -22,13 +29,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
 )
 
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
-from llama_stack.providers.utils.inference.openai_compat import (
-    prepare_openai_completion_params,
-)
-
 logger = get_logger(name=__name__, category="providers::utils")
 
 
diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
index e7ca5ab74..42b54497f 100644
--- a/src/llama_stack/providers/utils/inference/model_registry.py
+++ b/src/llama_stack/providers/utils/inference/model_registry.py
@@ -6,13 +6,13 @@
 
 from typing import Any
 
-from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference import (
     ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
 )
+from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
index c97e42274..32d41ffde 100644
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@@ -20,18 +20,6 @@ except ImportError:
     from openai.types.chat.chat_completion_message_tool_call import (
         ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
     )
-from llama_stack_api import (
-    URL,
-    GreedySamplingStrategy,
-    ImageContentItem,
-    JsonSchemaResponseFormat,
-    OpenAIResponseFormatParam,
-    SamplingParams,
-    TextContentItem,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    _URLOrData,
-)
 from openai.types.chat import (
     ChatCompletionMessageToolCall,
 )
@@ -44,6 +32,18 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
+from llama_stack_api import (
+    URL,
+    GreedySamplingStrategy,
+    ImageContentItem,
+    JsonSchemaResponseFormat,
+    OpenAIResponseFormatParam,
+    SamplingParams,
+    TextContentItem,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    _URLOrData,
+)
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
index c05873df5..559ac90ce 100644
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@@ -10,6 +10,14 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
 
+from openai import AsyncOpenAI
+from pydantic import BaseModel, ConfigDict
+
+from llama_stack.core.request_headers import NeedsRequestProviderData
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
+from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
 from llama_stack_api import (
     Model,
     ModelType,
@@ -24,14 +32,6 @@ from llama_stack_api import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
 )
-from openai import AsyncOpenAI
-from pydantic import BaseModel, ConfigDict
-
-from llama_stack.core.request_headers import NeedsRequestProviderData
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
-from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
index ea01a34e9..6272c9eed 100644
--- a/src/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -12,24 +12,6 @@ import re
 from typing import Any
 
 import httpx
-from llama_stack_api import (
-    CompletionRequest,
-    ImageContentItem,
-    InterleavedContent,
-    InterleavedContentItem,
-    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionContentPartImageParam,
-    OpenAIChatCompletionContentPartTextParam,
-    OpenAIFile,
-    OpenAIMessageParam,
-    OpenAISystemMessageParam,
-    OpenAIToolMessageParam,
-    OpenAIUserMessageParam,
-    ResponseFormat,
-    ResponseFormatType,
-    TextContentItem,
-    ToolChoice,
-)
 from PIL import Image as PIL_Image
 
 from llama_stack.log import get_logger
@@ -48,6 +30,24 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
+from llama_stack_api import (
+    CompletionRequest,
+    ImageContentItem,
+    InterleavedContent,
+    InterleavedContentItem,
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    OpenAIFile,
+    OpenAIMessageParam,
+    OpenAISystemMessageParam,
+    OpenAIToolMessageParam,
+    OpenAIUserMessageParam,
+    ResponseFormat,
+    ResponseFormatType,
+    TextContentItem,
+    ToolChoice,
+)
 
 log = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
index 895268a4f..0f8fa0a95 100644
--- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py
+++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import json_schema_type
 from pydantic import BaseModel, Field
 
+from llama_stack_api import json_schema_type
+
 
 @json_schema_type
 class SqliteControlPlaneConfig(BaseModel):
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 68d1c11e5..540ff5940 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -13,6 +13,16 @@ from abc import ABC, abstractmethod
 from typing import Annotated, Any
 
 from fastapi import Body
+from pydantic import TypeAdapter
+
+from llama_stack.core.id_generation import generate_object_id
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.kvstore.api import KVStore
+from llama_stack.providers.utils.memory.vector_store import (
+    ChunkForDeletion,
+    content_from_data_and_mime_type,
+    make_overlapped_chunks,
+)
 from llama_stack_api import (
     Chunk,
     Files,
@@ -43,16 +53,6 @@ from llama_stack_api import (
     VectorStoreSearchResponse,
     VectorStoreSearchResponsePage,
 )
-from pydantic import TypeAdapter
-
-from llama_stack.core.id_generation import generate_object_id
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    content_from_data_and_mime_type,
-    make_overlapped_chunks,
-)
 
 EMBEDDING_DIMENSION = 768
 
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 37ac79039..b6a671ddb 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -14,6 +14,15 @@ from urllib.parse import unquote
 
 import httpx
 import numpy as np
+from numpy.typing import NDArray
+from pydantic import BaseModel
+
+from llama_stack.log import get_logger
+from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
+from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack_api import (
     URL,
     Api,
@@ -25,15 +34,6 @@ from llama_stack_api import (
     RAGDocument,
     VectorStore,
 )
-from numpy.typing import NDArray
-from pydantic import BaseModel
-
-from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
-from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 
 log = get_logger(name=__name__, category="providers::utils")
 
diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
index c7dfed15a..f6e7c435d 100644
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
+from llama_stack.log import get_logger
 from llama_stack_api import (
     ListOpenAIResponseInputItem,
     ListOpenAIResponseObject,
@@ -15,10 +18,6 @@ from llama_stack_api import (
     Order,
 )
 
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
-from llama_stack.log import get_logger
-
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
 from ..sqlstore.sqlstore import sqlstore_impl
diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
index d16c75263..f372db8b5 100644
--- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -6,9 +6,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
-
 from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
+from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
 
 
 class BaseScoringFn(ABC):
diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
index 033a00edc..708fc7095 100644
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@@ -8,9 +8,10 @@ from collections.abc import Mapping, Sequence
 from enum import Enum
 from typing import Any, Literal, Protocol
 
-from llama_stack_api import PaginatedResponse
 from pydantic import BaseModel
 
+from llama_stack_api import PaginatedResponse
+
 
 class ColumnType(Enum):
     INTEGER = "INTEGER"
diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index 263f5e69f..10009d396 100644
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -6,7 +6,6 @@
 from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 
-from llama_stack_api import PaginatedResponse
 from sqlalchemy import (
     JSON,
     Boolean,
@@ -29,6 +28,7 @@ from sqlalchemy.sql.elements import ColumnElement
 
 from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
+from llama_stack_api import PaginatedResponse
 
 from .api import ColumnDefinition, ColumnType, SqlStore
 
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index 82c85f46c..fad1bf0f0 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -10,6 +10,14 @@ from enum import Enum
 from typing import Any, cast
 
 import httpx
+from mcp import ClientSession, McpError
+from mcp import types as mcp_types
+from mcp.client.sse import sse_client
+from mcp.client.streamable_http import streamablehttp_client
+
+from llama_stack.core.datatypes import AuthenticationRequiredError
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 from llama_stack_api import (
     ImageContentItem,
     InterleavedContentItem,
@@ -19,14 +27,6 @@ from llama_stack_api import (
     ToolInvocationResult,
     _URLOrData,
 )
-from mcp import ClientSession, McpError
-from mcp import types as mcp_types
-from mcp.client.sse import sse_client
-from mcp.client.streamable_http import streamablehttp_client
-
-from llama_stack.core.datatypes import AuthenticationRequiredError
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.tools.ttl_dict import TTLDict
 
 logger = get_logger(__name__, category="tools")
 
diff --git a/src/llama-stack-api/README.md b/src/llama_stack_api/README.md
similarity index 100%
rename from src/llama-stack-api/README.md
rename to src/llama_stack_api/README.md
diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/__init__.py
rename to src/llama_stack_api/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/agents.py
rename to src/llama_stack_api/agents.py
diff --git a/src/llama-stack-api/llama_stack_api/batches.py b/src/llama_stack_api/batches.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/batches.py
rename to src/llama_stack_api/batches.py
diff --git a/src/llama-stack-api/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/benchmarks.py
rename to src/llama_stack_api/benchmarks.py
diff --git a/src/llama-stack-api/llama_stack_api/common/__init__.py b/src/llama_stack_api/common/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/__init__.py
rename to src/llama_stack_api/common/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/common/content_types.py b/src/llama_stack_api/common/content_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/content_types.py
rename to src/llama_stack_api/common/content_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/errors.py b/src/llama_stack_api/common/errors.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/errors.py
rename to src/llama_stack_api/common/errors.py
diff --git a/src/llama-stack-api/llama_stack_api/common/job_types.py b/src/llama_stack_api/common/job_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/job_types.py
rename to src/llama_stack_api/common/job_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/responses.py b/src/llama_stack_api/common/responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/responses.py
rename to src/llama_stack_api/common/responses.py
diff --git a/src/llama-stack-api/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/tracing.py
rename to src/llama_stack_api/common/tracing.py
diff --git a/src/llama-stack-api/llama_stack_api/common/training_types.py b/src/llama_stack_api/common/training_types.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/training_types.py
rename to src/llama_stack_api/common/training_types.py
diff --git a/src/llama-stack-api/llama_stack_api/common/type_system.py b/src/llama_stack_api/common/type_system.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/common/type_system.py
rename to src/llama_stack_api/common/type_system.py
diff --git a/src/llama-stack-api/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/conversations.py
rename to src/llama_stack_api/conversations.py
diff --git a/src/llama-stack-api/llama_stack_api/datasetio.py b/src/llama_stack_api/datasetio.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datasetio.py
rename to src/llama_stack_api/datasetio.py
diff --git a/src/llama-stack-api/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datasets.py
rename to src/llama_stack_api/datasets.py
diff --git a/src/llama-stack-api/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/datatypes.py
rename to src/llama_stack_api/datatypes.py
diff --git a/src/llama-stack-api/llama_stack_api/eval.py b/src/llama_stack_api/eval.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/eval.py
rename to src/llama_stack_api/eval.py
diff --git a/src/llama-stack-api/llama_stack_api/files.py b/src/llama_stack_api/files.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/files.py
rename to src/llama_stack_api/files.py
diff --git a/src/llama-stack-api/llama_stack_api/inference.py b/src/llama_stack_api/inference.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/inference.py
rename to src/llama_stack_api/inference.py
diff --git a/src/llama-stack-api/llama_stack_api/inspect.py b/src/llama_stack_api/inspect.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/inspect.py
rename to src/llama_stack_api/inspect.py
diff --git a/src/llama-stack-api/llama_stack_api/models.py b/src/llama_stack_api/models.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/models.py
rename to src/llama_stack_api/models.py
diff --git a/src/llama-stack-api/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/openai_responses.py
rename to src/llama_stack_api/openai_responses.py
diff --git a/src/llama-stack-api/llama_stack_api/post_training.py b/src/llama_stack_api/post_training.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/post_training.py
rename to src/llama_stack_api/post_training.py
diff --git a/src/llama-stack-api/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/prompts.py
rename to src/llama_stack_api/prompts.py
diff --git a/src/llama-stack-api/llama_stack_api/providers.py b/src/llama_stack_api/providers.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/providers.py
rename to src/llama_stack_api/providers.py
diff --git a/src/llama-stack-api/llama_stack_api/py.typed b/src/llama_stack_api/py.typed
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/py.typed
rename to src/llama_stack_api/py.typed
diff --git a/src/llama-stack-api/pyproject.toml b/src/llama_stack_api/pyproject.toml
similarity index 100%
rename from src/llama-stack-api/pyproject.toml
rename to src/llama_stack_api/pyproject.toml
diff --git a/src/llama-stack-api/llama_stack_api/rag_tool.py b/src/llama_stack_api/rag_tool.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/rag_tool.py
rename to src/llama_stack_api/rag_tool.py
diff --git a/src/llama-stack-api/llama_stack_api/resource.py b/src/llama_stack_api/resource.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/resource.py
rename to src/llama_stack_api/resource.py
diff --git a/src/llama-stack-api/llama_stack_api/safety.py b/src/llama_stack_api/safety.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/safety.py
rename to src/llama_stack_api/safety.py
diff --git a/src/llama-stack-api/llama_stack_api/schema_utils.py b/src/llama_stack_api/schema_utils.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/schema_utils.py
rename to src/llama_stack_api/schema_utils.py
diff --git a/src/llama-stack-api/llama_stack_api/scoring.py b/src/llama_stack_api/scoring.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/scoring.py
rename to src/llama_stack_api/scoring.py
diff --git a/src/llama-stack-api/llama_stack_api/scoring_functions.py b/src/llama_stack_api/scoring_functions.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/scoring_functions.py
rename to src/llama_stack_api/scoring_functions.py
diff --git a/src/llama-stack-api/llama_stack_api/shields.py b/src/llama_stack_api/shields.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/shields.py
rename to src/llama_stack_api/shields.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py b/src/llama_stack_api/strong_typing/__init__.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/__init__.py
rename to src/llama_stack_api/strong_typing/__init__.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py b/src/llama_stack_api/strong_typing/auxiliary.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py
rename to src/llama_stack_api/strong_typing/auxiliary.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py b/src/llama_stack_api/strong_typing/classdef.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/classdef.py
rename to src/llama_stack_api/strong_typing/classdef.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/core.py b/src/llama_stack_api/strong_typing/core.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/core.py
rename to src/llama_stack_api/strong_typing/core.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py b/src/llama_stack_api/strong_typing/deserializer.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py
rename to src/llama_stack_api/strong_typing/deserializer.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py b/src/llama_stack_api/strong_typing/docstring.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/docstring.py
rename to src/llama_stack_api/strong_typing/docstring.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py b/src/llama_stack_api/strong_typing/exception.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/exception.py
rename to src/llama_stack_api/strong_typing/exception.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py b/src/llama_stack_api/strong_typing/inspection.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/inspection.py
rename to src/llama_stack_api/strong_typing/inspection.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py b/src/llama_stack_api/strong_typing/mapping.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/mapping.py
rename to src/llama_stack_api/strong_typing/mapping.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/name.py b/src/llama_stack_api/strong_typing/name.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/name.py
rename to src/llama_stack_api/strong_typing/name.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama_stack_api/strong_typing/py.typed
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/py.typed
rename to src/llama_stack_api/strong_typing/py.typed
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py b/src/llama_stack_api/strong_typing/schema.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/schema.py
rename to src/llama_stack_api/strong_typing/schema.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py b/src/llama_stack_api/strong_typing/serialization.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/serialization.py
rename to src/llama_stack_api/strong_typing/serialization.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py b/src/llama_stack_api/strong_typing/serializer.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/serializer.py
rename to src/llama_stack_api/strong_typing/serializer.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py b/src/llama_stack_api/strong_typing/slots.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/slots.py
rename to src/llama_stack_api/strong_typing/slots.py
diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py b/src/llama_stack_api/strong_typing/topological.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/strong_typing/topological.py
rename to src/llama_stack_api/strong_typing/topological.py
diff --git a/src/llama-stack-api/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/tools.py
rename to src/llama_stack_api/tools.py
diff --git a/src/llama-stack-api/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/vector_io.py
rename to src/llama_stack_api/vector_io.py
diff --git a/src/llama-stack-api/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/vector_stores.py
rename to src/llama_stack_api/vector_stores.py
diff --git a/src/llama-stack-api/llama_stack_api/version.py b/src/llama_stack_api/version.py
similarity index 100%
rename from src/llama-stack-api/llama_stack_api/version.py
rename to src/llama_stack_api/version.py
diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py
index b9c0ac916..4dc5b7993 100644
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@@ -13,6 +13,7 @@ from contextlib import contextmanager
 from io import BytesIO
 
 import pytest
+
 from llama_stack_api import OpenAIFilePurpose
 
 
diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py
index 61878ac4c..1f19c88c5 100644
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@@ -9,9 +9,9 @@ from unittest.mock import patch
 
 import pytest
 import requests
-from llama_stack_api import OpenAIFilePurpose
 
 from llama_stack.core.datatypes import User
+from llama_stack_api import OpenAIFilePurpose
 
 purpose = OpenAIFilePurpose.ASSISTANTS
 
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index d007b57d6..e4a0a24b5 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -15,6 +15,9 @@ that enables routing based on provider_data alone.
 from unittest.mock import AsyncMock, patch
 
 import pytest
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack_api import (
     Api,
     OpenAIAssistantMessageParam,
@@ -23,9 +26,6 @@ from llama_stack_api import (
     OpenAIChoice,
 )
 
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-from llama_stack.core.telemetry.telemetry import MetricEvent
-
 
 class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
     metrics: list[MetricEvent] | None = None
diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py
index ff6925b58..e6868019a 100644
--- a/tests/integration/post_training/test_post_training.py
+++ b/tests/integration/post_training/test_post_training.py
@@ -9,6 +9,8 @@ import time
 import uuid
 
 import pytest
+
+from llama_stack.log import get_logger
 from llama_stack_api import (
     DataConfig,
     DatasetFormat,
@@ -18,8 +20,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.log import get_logger
-
 # Configure logging
 logger = get_logger(name=__name__, category="post_training")
 
diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py
index 99b4982f0..a554752cd 100644
--- a/tests/integration/safety/test_llama_guard.py
+++ b/tests/integration/safety/test_llama_guard.py
@@ -12,9 +12,9 @@ import warnings
 from collections.abc import Generator
 
 import pytest
-from llama_stack_api import ViolationLevel
 
 from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack_api import ViolationLevel
 
 # Llama Guard models available for text and vision shields
 LLAMA_GUARD_TEXT_MODELS = [CoreModelId.llama_guard_4_12b.value]
diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py
index 6a926f1d5..857ff2f81 100644
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@@ -7,6 +7,7 @@ import base64
 import mimetypes
 
 import pytest
+
 from llama_stack_api import ViolationLevel
 
 CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py
index b85a23263..dc7b7e1ad 100644
--- a/tests/integration/safety/test_vision_safety.py
+++ b/tests/integration/safety/test_vision_safety.py
@@ -9,6 +9,7 @@ import mimetypes
 import os
 
 import pytest
+
 from llama_stack_api import ViolationLevel
 
 VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index 1b1b6ef28..036a5f018 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -7,9 +7,9 @@
 import re
 
 import pytest
-from llama_stack_api import ToolGroupNotFoundError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
+from llama_stack_api import ToolGroupNotFoundError
 from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
 
 
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index c65dfecac..102f3f00c 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,12 +8,12 @@ import time
 from io import BytesIO
 
 import pytest
-from llama_stack_api import Chunk, ExpiresAfter
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
+from llama_stack_api import Chunk, ExpiresAfter
 
 from ..conftest import vector_provider_wrapper
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index acaa44bcb..29dbd3e56 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import pytest
+
 from llama_stack_api import Chunk
 
 from ..conftest import vector_provider_wrapper
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 2f942eb9c..95c54d379 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -8,7 +8,6 @@ import tempfile
 from pathlib import Path
 
 import pytest
-from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 from openai.types.conversations.conversation import Conversation as OpenAIConversation
 from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
 from pydantic import TypeAdapter
@@ -25,6 +24,7 @@ from llama_stack.core.storage.datatypes import (
     StorageConfig,
 )
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
 
 
 @pytest.fixture
diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py
index 7e465513e..1b24a59a2 100644
--- a/tests/unit/core/routers/test_safety_router.py
+++ b/tests/unit/core/routers/test_safety_router.py
@@ -6,10 +6,9 @@
 
 from unittest.mock import AsyncMock
 
-from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
-
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.core.routers.safety import SafetyRouter
+from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
 
 
 async def test_run_moderation_uses_default_shield_when_model_missing():
diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py
index 071fbe6e7..202e2da1b 100644
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, Mock
 
 import pytest
-from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
 
 from llama_stack.core.routers.vector_io import VectorIORouter
+from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
 
 
 async def test_single_provider_auto_selection():
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index acb31e1c9..462a25c8b 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -9,10 +9,10 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
 from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
+from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
 
 
 class TestVectorStoresValidation:
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 2405d536e..8fd9d6ec3 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -9,6 +9,14 @@
 from unittest.mock import AsyncMock
 
 import pytest
+
+from llama_stack.core.datatypes import RegistryEntrySource
+from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
+from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
+from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
+from llama_stack.core.routing_tables.shields import ShieldsRoutingTable
+from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable
 from llama_stack_api import (
     URL,
     Api,
@@ -25,14 +33,6 @@ from llama_stack_api import (
     URIDataSource,
 )
 
-from llama_stack.core.datatypes import RegistryEntrySource
-from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
-from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
-from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable
-from llama_stack.core.routing_tables.shields import ShieldsRoutingTable
-from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable
-
 
 class Impl:
     def __init__(self, api: Api):
diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py
index f66b57df8..889f063e6 100644
--- a/tests/unit/distribution/test_api_recordings.py
+++ b/tests/unit/distribution/test_api_recordings.py
@@ -9,6 +9,14 @@ from pathlib import Path
 from unittest.mock import patch
 
 import pytest
+from openai import AsyncOpenAI
+
+from llama_stack.testing.api_recorder import (
+    APIRecordingMode,
+    ResponseStorage,
+    api_recording,
+    normalize_inference_request,
+)
 
 # Import the real Pydantic response types instead of using Mocks
 from llama_stack_api import (
@@ -19,14 +27,6 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
 )
-from openai import AsyncOpenAI
-
-from llama_stack.testing.api_recorder import (
-    APIRecordingMode,
-    ResponseStorage,
-    api_recording,
-    normalize_inference_request,
-)
 
 
 @pytest.fixture
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index a27455e24..b8d6ba55d 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -9,7 +9,6 @@ from unittest.mock import patch
 
 import pytest
 import yaml
-from llama_stack_api import ProviderSpec
 from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -23,6 +22,7 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
+from llama_stack_api import ProviderSpec
 
 
 class SampleConfig(BaseModel):
@@ -395,9 +395,8 @@ pip_packages:
 
     def test_external_provider_from_module_building(self, mock_providers):
         """Test loading an external provider from a module during build (building=True, partial spec)."""
-        from llama_stack_api import Api
-
         from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
+        from llama_stack_api import Api
 
         # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
         build_config = BuildConfig(
@@ -457,9 +456,8 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         fake_spec = ProviderSpec(
             api=Api.inference,
@@ -595,9 +593,8 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -644,9 +641,8 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         spec1 = ProviderSpec(
             api=Api.inference,
@@ -693,9 +689,8 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         # Module returns both inline and remote variants
         spec1 = ProviderSpec(
@@ -833,9 +828,8 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
-        from llama_stack_api import ProviderSpec
-
         from llama_stack.core.distribution import get_external_providers_from_module
+        from llama_stack_api import ProviderSpec
 
         inference_spec = ProviderSpec(
             api=Api.inference,
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index 080d1ddbe..793f4edd3 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
@@ -15,6 +14,7 @@ from llama_stack.providers.inline.files.localfs import (
     LocalfsFilesImplConfig,
 )
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
 
 
 class MockUploadFile:
diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py
index 3c93a578d..32d59234d 100644
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@@ -58,6 +58,7 @@ import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
+
 from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
 
 
diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py
index 4cd5d962d..acb7ca01c 100644
--- a/tests/unit/providers/batches/test_reference_idempotency.py
+++ b/tests/unit/providers/batches/test_reference_idempotency.py
@@ -43,6 +43,7 @@ Key Behaviors Tested:
 import asyncio
 
 import pytest
+
 from llama_stack_api import ConflictError
 
 
diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py
index ae63c1a78..de6c92e9c 100644
--- a/tests/unit/providers/files/test_s3_files.py
+++ b/tests/unit/providers/files/test_s3_files.py
@@ -8,6 +8,7 @@ from unittest.mock import patch
 
 import pytest
 from botocore.exceptions import ClientError
+
 from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py
index 873db4e27..e113611bd 100644
--- a/tests/unit/providers/files/test_s3_files_auth.py
+++ b/tests/unit/providers/files/test_s3_files_auth.py
@@ -7,10 +7,10 @@
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 from llama_stack.core.datatypes import User
 from llama_stack.providers.remote.files.s3.files import S3FilesImpl
+from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
 
 
 async def test_listing_hides_other_users_file(s3_provider, sample_text_file):
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index b3eecc558..a20f2860a 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -8,11 +8,11 @@ from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 from openai import AuthenticationError
 
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_adapter_initialization():
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index e2a5455b7..958895cc4 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -9,6 +9,11 @@ import time
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
 import pytest
+
+from llama_stack.core.routers.inference import InferenceRouter
+from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
+from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
 from llama_stack_api import (
     HealthStatus,
     Model,
@@ -22,11 +27,6 @@ from llama_stack_api import (
     ToolChoice,
 )
 
-from llama_stack.core.routers.inference import InferenceRouter
-from llama_stack.core.routing_tables.models import ModelsRoutingTable
-from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
-from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
-
 # These are unit test for the remote vllm provider
 # implementation. This should only contain tests which are specific to
 # the implementation details of those classes. More general
diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
index 36d2b86a9..658132340 100644
--- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
+++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py
@@ -7,12 +7,12 @@
 from unittest.mock import AsyncMock
 
 import pytest
-from llama_stack_api import ToolDef
 
 from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
     convert_tooldef_to_chat_tool,
 )
 from llama_stack.providers.inline.agents.meta_reference.responses.types import ChatCompletionContext
+from llama_stack_api import ToolDef
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py
index 0d9f1cc35..36006cc39 100644
--- a/tests/unit/providers/nvidia/test_datastore.py
+++ b/tests/unit/providers/nvidia/test_datastore.py
@@ -8,10 +8,10 @@ import os
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
 from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
+from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index c41379801..783d664bf 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -8,6 +8,10 @@ import os
 from unittest.mock import MagicMock, patch
 
 import pytest
+
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
+from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
 from llama_stack_api import (
     Benchmark,
     BenchmarkConfig,
@@ -20,10 +24,6 @@ from llama_stack_api import (
     TopPSamplingStrategy,
 )
 
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
-from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
-
 MOCK_DATASET_ID = "default/test-dataset"
 MOCK_BENCHMARK_ID = "test-benchmark"
 
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index ba68a7abe..b714fc607 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -9,6 +9,12 @@ import warnings
 from unittest.mock import patch
 
 import pytest
+
+from llama_stack.core.library_client import convert_pydantic_to_json_value
+from llama_stack.providers.remote.post_training.nvidia.post_training import (
+    NvidiaPostTrainingAdapter,
+    NvidiaPostTrainingConfig,
+)
 from llama_stack_api import (
     DataConfig,
     DatasetFormat,
@@ -19,12 +25,6 @@ from llama_stack_api import (
     TrainingConfig,
 )
 
-from llama_stack.core.library_client import convert_pydantic_to_json_value
-from llama_stack.providers.remote.post_training.nvidia.post_training import (
-    NvidiaPostTrainingAdapter,
-    NvidiaPostTrainingConfig,
-)
-
 
 class TestNvidiaParameters:
     @pytest.fixture(autouse=True)
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 8b313abcd..ee62910b8 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -8,11 +8,11 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
-from llama_stack_api import ModelType
 
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import ModelType
 
 
 class MockResponse:
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index ea6254841..07e04ddea 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -9,6 +9,9 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+
+from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
+from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIUserMessageParam,
@@ -18,9 +21,6 @@ from llama_stack_api import (
     ViolationLevel,
 )
 
-from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
-from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
-
 
 class FakeNVIDIASafetyAdapter(NVIDIASafetyAdapter):
     """Test implementation that provides the required shield_store."""
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 4d0ce695b..94948da41 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -9,15 +9,6 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_api import (
-    DataConfig,
-    DatasetFormat,
-    LoraFinetuningConfig,
-    OptimizerConfig,
-    OptimizerType,
-    QATFinetuningConfig,
-    TrainingConfig,
-)
 
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
@@ -27,6 +18,15 @@ from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingJob,
     NvidiaPostTrainingJobStatusResponse,
 )
+from llama_stack_api import (
+    DataConfig,
+    DatasetFormat,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
index df7453712..7126e1b69 100644
--- a/tests/unit/providers/test_bedrock.py
+++ b/tests/unit/providers/test_bedrock.py
@@ -7,10 +7,9 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, PropertyMock, patch
 
-from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
-
 from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
+from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
 
 
 def test_can_create_adapter():
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index b9b59bb79..5b13a75f4 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -10,12 +10,12 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
 
 import pytest
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 from pydantic import BaseModel, Field
 
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
 
 
 class OpenAIMixinImpl(OpenAIMixin):
diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py
index a7c9289d7..ab5736ac5 100644
--- a/tests/unit/providers/utils/inference/test_prompt_adapter.py
+++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py
@@ -4,12 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
-
 from llama_stack.models.llama.datatypes import RawTextItem
 from llama_stack.providers.utils.inference.prompt_adapter import (
     convert_openai_message_to_raw_message,
 )
+from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
 
 
 class TestConvertOpenAIMessageToRawMessage:
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 00db5795a..f3241ba20 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-from llama_stack_api import URL, RAGDocument, TextContentItem
 
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
+from llama_stack_api import URL, RAGDocument, TextContentItem
 
 
 async def test_content_from_doc_with_url():
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 4a85cf8b8..1e3efafa1 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -34,9 +34,9 @@
 #
 
 import pytest
-from llama_stack_api import Model
 
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
+from llama_stack_api import Model
 
 
 @pytest.fixture
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 216e9b8ea..6408e25ab 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
@@ -19,6 +18,7 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteV
 from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
 
 EMBEDDING_DIMENSION = 768
 COLLECTION_PREFIX = "test_collection"
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 0d5c1399f..075296cbb 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,13 +9,13 @@ from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import (
     FaissIndex,
     FaissVectorIOAdapter,
 )
+from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
 
 # This test is a unit test for the FaissVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 17a99ce1c..d1548cf37 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,13 +8,13 @@ import asyncio
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, QueryChunksResponse
 
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
     _create_sqlite_connection,
 )
+from llama_stack_api import Chunk, QueryChunksResponse
 
 # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 7ba40eefb..3797abb2c 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -10,6 +10,8 @@ from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
+
+from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
 from llama_stack_api import (
     Chunk,
     OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@@ -21,8 +23,6 @@ from llama_stack_api import (
     VectorStoreNotFoundError,
 )
 
-from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
-
 # This test is a unit test for the inline VectorIO providers. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
 # tests/integration/vector_io/
@@ -255,10 +255,9 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
 
 async def test_document_id_with_invalid_type_raises_error():
     """Ensure TypeError is raised when document_id is not a string."""
-    from llama_stack_api import Chunk
-
     # Integer document_id should raise TypeError
     from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+    from llama_stack_api import Chunk
 
     chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
     with pytest.raises(TypeError) as exc_info:
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index 678b76fbd..7f6b4af79 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack_api import Chunk, ChunkMetadata
-
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack_api import Chunk, ChunkMetadata
 
 # This test is a unit test for the chunk_utils.py helpers. This should only contain
 # tests which are specific to this file. More general (API-level) tests should be placed in
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index e3f5e46d7..7eb17b74b 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -7,9 +7,9 @@
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
+from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
 
 
 class TestRagQuery:
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 23c12dcab..2562df8d6 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -12,7 +12,6 @@ from unittest.mock import AsyncMock, MagicMock
 
 import numpy as np
 import pytest
-from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 from llama_stack.providers.utils.memory.vector_store import (
     URL,
@@ -22,6 +21,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     make_overlapped_chunks,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
 
 DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
 # Depending on the machine, this can get parsed a couple of ways
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 01f486ab2..1b5032782 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-from llama_stack_api import Model, VectorStore
 
 from llama_stack.core.datatypes import VectorStoreWithOwner
 from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
@@ -16,6 +15,7 @@ from llama_stack.core.store.registry import (
     DiskDistributionRegistry,
 )
 from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
+from llama_stack_api import Model, VectorStore
 
 
 @pytest.fixture
@@ -303,9 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry):
 
 async def test_double_registration_with_cache(cached_disk_dist_registry):
     """Test double registration behavior with caching enabled."""
-    from llama_stack_api import ModelType
-
     from llama_stack.core.datatypes import ModelWithOwner
+    from llama_stack_api import ModelType
 
     model1 = ModelWithOwner(
         identifier="test_model",
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 2827f60b9..a09d2a30d 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,10 +5,9 @@
 # the root directory of this source tree.
 
 
-from llama_stack_api import ModelType
-
 from llama_stack.core.datatypes import ModelWithOwner, User
 from llama_stack.core.store.registry import CachedDiskDistributionRegistry
+from llama_stack_api import ModelType
 
 
 async def test_registry_cache_with_acl(cached_disk_dist_registry):
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index 1df933d4d..23a9636d5 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -8,12 +8,12 @@ from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 import yaml
-from llama_stack_api import Api, ModelType
 from pydantic import TypeAdapter, ValidationError
 
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack_api import Api, ModelType
 
 
 class AsyncMock(MagicMock):
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 071178f96..8f8a61ea7 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,7 +9,6 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
-from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 from pydantic import BaseModel, Field
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@@ -27,6 +26,7 @@ from llama_stack.core.storage.datatypes import (
 )
 from llama_stack.providers.utils.kvstore import register_kvstore_backends
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
 
 
 def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index fdaf9022b..d82743c80 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -9,9 +9,9 @@ import logging  # allow-direct-logging
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
-from llama_stack_api import PaginatedResponse
 
 from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
+from llama_stack_api import PaginatedResponse
 
 
 @pytest.fixture
diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py
index 79e0b6e28..623955984 100644
--- a/tests/unit/tools/test_tools_json_schema.py
+++ b/tests/unit/tools/test_tools_json_schema.py
@@ -9,10 +9,10 @@ Unit tests for JSON Schema-based tool definitions.
 Tests the new input_schema and output_schema fields.
 """
 
-from llama_stack_api import ToolDef
 from pydantic import ValidationError
 
 from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
+from llama_stack_api import ToolDef
 
 
 class TestToolDefValidation:
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index 4da20b125..bdcc529ce 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -7,6 +7,10 @@
 import time
 
 import pytest
+
+from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
+from llama_stack.providers.utils.inference.inference_store import InferenceStore
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from llama_stack_api import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -15,10 +19,6 @@ from llama_stack_api import (
     Order,
 )
 
-from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
-from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
-
 
 @pytest.fixture(autouse=True)
 def setup_backends(tmp_path):
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 1119a93d8..8c108d9c1 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -9,11 +9,11 @@ from tempfile import TemporaryDirectory
 from uuid import uuid4
 
 import pytest
-from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
 from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
 
 
 def build_store(db_path: str, policy: list | None = None) -> ResponsesStore:
diff --git a/uv.lock b/uv.lock
index ddf8c1cd4..7e0575df6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2095,7 +2095,7 @@ requires-dist = [
     { name = "httpx" },
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
-    { name = "llama-stack-api", editable = "src/llama-stack-api" },
+    { name = "llama-stack-api", editable = "src/llama_stack_api" },
     { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
@@ -2231,7 +2231,7 @@ unit = [
 [[package]]
 name = "llama-stack-api"
 version = "0.1.0"
-source = { editable = "src/llama-stack-api" }
+source = { editable = "src/llama_stack_api" }
 dependencies = [
     { name = "jsonschema" },
     { name = "opentelemetry-exporter-otlp-proto-http" },