From ac5fd57387f8fded5e6129789e2d09d01f6d67ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 26 Jun 2025 04:31:05 +0200
Subject: [PATCH] chore: remove nested imports (#2515)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

* Given that our API packages use "import *" in `__init.py__` we don't
need to do `from llama_stack.apis.models.models` but simply from
llama_stack.apis.models. The decision to use `import *` is debatable and
should probably be revisited at one point.

* Remove unneeded Ruff F401 rule
* Consolidate Ruff F403 rule in the pyprojectfrom
llama_stack.apis.models.models

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/apis/agents/__init__.py           |  2 +-
 llama_stack/apis/batch_inference/__init__.py  |  2 +-
 llama_stack/apis/benchmarks/__init__.py       |  2 +-
 llama_stack/apis/datasetio/__init__.py        |  2 +-
 llama_stack/apis/datasets/__init__.py         |  2 +-
 llama_stack/apis/eval/__init__.py             |  2 +-
 llama_stack/apis/files/__init__.py            |  2 +-
 llama_stack/apis/inference/__init__.py        |  2 +-
 llama_stack/apis/inference/inference.py       |  2 +-
 llama_stack/apis/inspect/__init__.py          |  2 +-
 llama_stack/apis/models/__init__.py           |  2 +-
 llama_stack/apis/post_training/__init__.py    |  2 +-
 llama_stack/apis/providers/__init__.py        |  2 +-
 llama_stack/apis/safety/__init__.py           |  2 +-
 llama_stack/apis/scoring/__init__.py          |  2 +-
 .../apis/scoring_functions/__init__.py        |  2 +-
 llama_stack/apis/shields/__init__.py          |  2 +-
 .../synthetic_data_generation/__init__.py     |  2 +-
 llama_stack/apis/telemetry/__init__.py        |  2 +-
 llama_stack/apis/tools/__init__.py            |  4 ++--
 llama_stack/apis/vector_dbs/__init__.py       |  2 +-
 llama_stack/apis/vector_io/__init__.py        |  2 +-
 llama_stack/distribution/routers/inference.py | 14 +++++-------
 llama_stack/distribution/routers/vector_io.py | 10 ++++-----
 llama_stack/distribution/ui/modules/api.py    |  2 +-
 llama_stack/log.py                            |  2 +-
 .../agents/meta_reference/openai_responses.py |  4 ++--
 .../inline/eval/meta_reference/eval.py        |  2 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  2 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |  2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  3 +--
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  6 ++---
 .../remote/datasetio/nvidia/datasetio.py      |  2 +-
 .../remote/inference/anthropic/models.py      |  2 +-
 .../remote/inference/fireworks/fireworks.py   | 14 +++++-------
 .../remote/inference/fireworks/models.py      |  2 +-
 .../remote/inference/gemini/models.py         |  2 +-
 .../providers/remote/inference/groq/groq.py   |  2 +-
 .../remote/inference/nvidia/nvidia.py         | 12 +++++-----
 .../remote/inference/ollama/models.py         |  2 +-
 .../remote/inference/ollama/ollama.py         | 16 ++++++--------
 .../remote/inference/openai/models.py         |  2 +-
 .../remote/inference/openai/openai.py         |  2 +-
 .../inference/passthrough/passthrough.py      | 12 +++++-----
 .../remote/inference/runpod/runpod.py         |  2 +-
 .../remote/inference/together/models.py       |  2 +-
 .../remote/inference/together/together.py     | 12 +++++-----
 .../providers/remote/inference/vllm/vllm.py   | 10 ++++-----
 .../remote/inference/watsonx/watsonx.py       | 14 +++++-------
 .../utils/inference/litellm_openai_mixin.py   | 18 +++++++--------
 .../utils/inference/model_registry.py         |  2 +-
 .../utils/inference/openai_compat.py          | 22 +++++++++----------
 .../utils/memory/openai_vector_store_mixin.py |  3 +--
 .../providers/utils/telemetry/tracing.py      |  2 +-
 llama_stack/templates/cerebras/cerebras.py    |  2 +-
 llama_stack/templates/ci-tests/ci_tests.py    |  2 +-
 llama_stack/templates/dell/dell.py            |  2 +-
 llama_stack/templates/fireworks/fireworks.py  |  2 +-
 llama_stack/templates/groq/groq.py            |  2 +-
 .../templates/hf-endpoint/hf_endpoint.py      |  2 +-
 .../templates/hf-serverless/hf_serverless.py  |  2 +-
 llama_stack/templates/llama_api/llama_api.py  |  2 +-
 .../meta-reference-gpu/meta_reference.py      |  2 +-
 llama_stack/templates/ollama/ollama.py        |  2 +-
 .../open-benchmark/open_benchmark.py          |  2 +-
 .../templates/passthrough/passthrough.py      |  2 +-
 .../templates/postgres-demo/postgres_demo.py  |  2 +-
 llama_stack/templates/remote-vllm/vllm.py     |  2 +-
 llama_stack/templates/sambanova/sambanova.py  |  2 +-
 llama_stack/templates/starter/starter.py      |  2 +-
 llama_stack/templates/template.py             |  2 +-
 llama_stack/templates/tgi/tgi.py              |  2 +-
 llama_stack/templates/together/together.py    |  2 +-
 llama_stack/templates/vllm-gpu/vllm.py        |  2 +-
 llama_stack/templates/watsonx/watsonx.py      |  2 +-
 pyproject.toml                                |  3 +++
 .../routers/test_routing_tables.py            |  2 +-
 .../meta_reference/fixtures/__init__.py       |  2 +-
 .../meta_reference/test_openai_responses.py   |  2 +-
 tests/unit/providers/nvidia/test_safety.py    |  2 +-
 .../utils/inference/test_openai_compat.py     |  2 +-
 .../providers/utils/test_model_registry.py    |  2 +-
 82 files changed, 143 insertions(+), 164 deletions(-)

diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py
index ab203b6cd..6416b283b 100644
--- a/llama_stack/apis/agents/__init__.py
+++ b/llama_stack/apis/agents/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .agents import *  # noqa: F401 F403
+from .agents import *
diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py
index 3249475ee..b9b2944b2 100644
--- a/llama_stack/apis/batch_inference/__init__.py
+++ b/llama_stack/apis/batch_inference/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .batch_inference import *  # noqa: F401 F403
+from .batch_inference import *
diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py
index f8f564957..62d1b367c 100644
--- a/llama_stack/apis/benchmarks/__init__.py
+++ b/llama_stack/apis/benchmarks/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .benchmarks import *  # noqa: F401 F403
+from .benchmarks import *
diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py
index 378afbba8..8c087bfa4 100644
--- a/llama_stack/apis/datasetio/__init__.py
+++ b/llama_stack/apis/datasetio/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .datasetio import *  # noqa: F401 F403
+from .datasetio import *
diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py
index 102b9927f..9c9a128d2 100644
--- a/llama_stack/apis/datasets/__init__.py
+++ b/llama_stack/apis/datasets/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .datasets import *  # noqa: F401 F403
+from .datasets import *
diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py
index 5f91ad70d..28a1d6049 100644
--- a/llama_stack/apis/eval/__init__.py
+++ b/llama_stack/apis/eval/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .eval import *  # noqa: F401 F403
+from .eval import *
diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py
index 269baf177..189e4de19 100644
--- a/llama_stack/apis/files/__init__.py
+++ b/llama_stack/apis/files/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .files import *  # noqa: F401 F403
+from .files import *
diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py
index f9f77f769..f0c8783c1 100644
--- a/llama_stack/apis/inference/__init__.py
+++ b/llama_stack/apis/inference/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .inference import *  # noqa: F401 F403
+from .inference import *
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index c64a5f750..222099064 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -20,7 +20,7 @@ from typing_extensions import TypedDict
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
-from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
+from llama_stack.apis.telemetry import MetricResponseMixin
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py
index 88ba8e908..016937e3d 100644
--- a/llama_stack/apis/inspect/__init__.py
+++ b/llama_stack/apis/inspect/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .inspect import *  # noqa: F401 F403
+from .inspect import *
diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py
index 410d8d1f9..ee90106b6 100644
--- a/llama_stack/apis/models/__init__.py
+++ b/llama_stack/apis/models/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .models import *  # noqa: F401 F403
+from .models import *
diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py
index 7129c4abd..695575a30 100644
--- a/llama_stack/apis/post_training/__init__.py
+++ b/llama_stack/apis/post_training/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .post_training import *  # noqa: F401 F403
+from .post_training import *
diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py
index b554a5d23..e35e2fe47 100644
--- a/llama_stack/apis/providers/__init__.py
+++ b/llama_stack/apis/providers/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .providers import *  # noqa: F401 F403
+from .providers import *
diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py
index dc3fe90b4..d93bc1355 100644
--- a/llama_stack/apis/safety/__init__.py
+++ b/llama_stack/apis/safety/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .safety import *  # noqa: F401 F403
+from .safety import *
diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py
index 0739dfc80..624b9e704 100644
--- a/llama_stack/apis/scoring/__init__.py
+++ b/llama_stack/apis/scoring/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .scoring import *  # noqa: F401 F403
+from .scoring import *
diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py
index b96acb45f..fc1de0311 100644
--- a/llama_stack/apis/scoring_functions/__init__.py
+++ b/llama_stack/apis/scoring_functions/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .scoring_functions import *  # noqa: F401 F403
+from .scoring_functions import *
diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py
index edad26100..783a4d124 100644
--- a/llama_stack/apis/shields/__init__.py
+++ b/llama_stack/apis/shields/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .shields import *  # noqa: F401 F403
+from .shields import *
diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py
index cfdec76ce..bc169e8e6 100644
--- a/llama_stack/apis/synthetic_data_generation/__init__.py
+++ b/llama_stack/apis/synthetic_data_generation/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .synthetic_data_generation import *  # noqa: F401 F403
+from .synthetic_data_generation import *
diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py
index 6a111dc9e..1250767f7 100644
--- a/llama_stack/apis/telemetry/__init__.py
+++ b/llama_stack/apis/telemetry/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .telemetry import *  # noqa: F401 F403
+from .telemetry import *
diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py
index be8846ba2..b25310ecf 100644
--- a/llama_stack/apis/tools/__init__.py
+++ b/llama_stack/apis/tools/__init__.py
@@ -4,5 +4,5 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .rag_tool import *  # noqa: F401 F403
-from .tools import *  # noqa: F401 F403
+from .rag_tool import *
+from .tools import *
diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_dbs/__init__.py
index 158241a6d..af34ba9d4 100644
--- a/llama_stack/apis/vector_dbs/__init__.py
+++ b/llama_stack/apis/vector_dbs/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_dbs import *  # noqa: F401 F403
+from .vector_dbs import *
diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py
index 3fe4fa4b6..3f4c60805 100644
--- a/llama_stack/apis/vector_io/__init__.py
+++ b/llama_stack/apis/vector_io/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_io import *  # noqa: F401 F403
+from .vector_io import *
diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py
index 50c429315..b39da7810 100644
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
@@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
     ListOpenAIChatCompletionResponse,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAICompletionWithInputMessages,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     Order,
     ResponseFormat,
     SamplingParams,
@@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.log import get_logger
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
index 6af3bd416..4bd5952dc 100644
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     SearchRankingOptions,
     VectorIO,
-    VectorStoreDeleteResponse,
-    VectorStoreListResponse,
-    VectorStoreObject,
-    VectorStoreSearchResponsePage,
-)
-from llama_stack.apis.vector_io.vector_io import (
     VectorStoreChunkingStrategy,
+    VectorStoreDeleteResponse,
     VectorStoreFileContentsResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFileStatus,
+    VectorStoreListResponse,
+    VectorStoreObject,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py
index 11455ed46..9db87b280 100644
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@@ -25,7 +25,7 @@ class LlamaStackApi:
     def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
         """Run scoring on a single row"""
         if not scoring_params:
-            scoring_params = {fn_id: None for fn_id in scoring_function_ids}
+            scoring_params = dict.fromkeys(scoring_function_ids)
         return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
 
 
diff --git a/llama_stack/log.py b/llama_stack/log.py
index c14967f0a..fcbb79a5d 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -33,7 +33,7 @@ CATEGORIES = [
 ]
 
 # Initialize category levels with default level
-_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
+_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
 
 
 def config_to_category_levels(category: str, level: str):
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index cf3293ed0..f291593f4 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -45,7 +45,7 @@ from llama_stack.apis.agents.openai_responses import (
     WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     Inference,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -584,7 +584,7 @@ class OpenAIResponsesImpl:
         from llama_stack.apis.agents.openai_responses import (
             MCPListToolsTool,
         )
-        from llama_stack.apis.tools.tools import Tool
+        from llama_stack.apis.tools import Tool
 
         mcp_tool_to_server = {}
 
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index bc0898dc5..9ae2018c4 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
                 for scoring_fn_id in scoring_functions
             }
         else:
-            scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions}
+            scoring_functions_dict = dict.fromkeys(scoring_functions)
 
         score_response = await self.scoring_api.score(
             input_rows=score_input_rows, scoring_functions=scoring_functions_dict
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index b705cb9b3..2bd113a94 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -7,7 +7,7 @@ from typing import Any
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index 51cdf6c3f..340215a53 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,7 +6,7 @@
 import re
 from typing import Any
 
-from llama_stack.apis.inference.inference import Inference, UserMessage
+from llama_stack.apis.inference import Inference, UserMessage
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 12f4d6ad0..355750b25 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -16,8 +16,7 @@ import numpy as np
 from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference, InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 3b3c5f486..7e977635a 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -15,8 +15,8 @@ import numpy as np
 import sqlite_vec
 from numpy.typing import NDArray
 
-from llama_stack.apis.files.files import Files
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.files import Files
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
     score_range = max_score - min_score
     if score_range > 0:
         return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
-    return {doc_id: 1.0 for doc_id in scores}
+    return dict.fromkeys(scores, 1.0)
 
 
 def _weighted_rerank(
diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index 1f22a935b..f723c92cc 100644
--- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
         Returns:
             Dataset
         """
-        ## add warnings for unsupported params
+        # add warnings for unsupported params
         request_body = {
             "name": dataset_def.identifier,
             "namespace": self.config.dataset_namespace,
diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py
index 39cb64440..afaf3c4e4 100644
--- a/llama_stack/providers/remote/inference/anthropic/models.py
+++ b/llama_stack/providers/remote/inference/anthropic/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 79b1b5f08..1c82ff3a8 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import (
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
index 027eeab8d..392aed72f 100644
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py
index ef1cf339f..c4bb4f08b 100644
--- a/llama_stack/providers/remote/inference/gemini/models.py
+++ b/llama_stack/providers/remote/inference/gemini/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index 27d7d7961..4b295e788 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -9,7 +9,7 @@ from typing import Any
 
 from openai import AsyncOpenAI
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChoiceDelta,
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index cb6c6e279..1dd72da3f 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
     ToolChoice,
     ToolConfig,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference import (
diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py
index 8f0f0421a..cacf88861 100644
--- a/llama_stack/providers/remote/inference/ollama/models.py
+++ b/llama_stack/providers/remote/inference/ollama/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 2f51920b5..e9df0dcc8 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -32,15 +32,6 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
-    ResponseFormat,
-    SamplingParams,
-    TextTruncation,
-    ToolChoice,
-    ToolConfig,
-    ToolDefinition,
-    ToolPromptFormat,
-)
-from llama_stack.apis.inference.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAICompletion,
@@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
+    ResponseFormat,
+    SamplingParams,
+    TextTruncation,
+    ToolChoice,
+    ToolConfig,
+    ToolDefinition,
+    ToolPromptFormat,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py
index e029c456c..14a6955d5 100644
--- a/llama_stack/providers/remote/inference/openai/models.py
+++ b/llama_stack/providers/remote/inference/openai/models.py
@@ -6,7 +6,7 @@
 
 from dataclasses import dataclass
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py
index ed4ec22aa..72428422f 100644
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@@ -10,7 +10,7 @@ from typing import Any
 
 from openai import AsyncOpenAI
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAICompletion,
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index e9660abb9..d5b3a5973 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model
 from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index f8c98893e..1863b8a50 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
 from openai import OpenAI
 
 from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
+from llama_stack.apis.inference import OpenAIEmbeddingsResponse
 
 # from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
index f4b259767..85e1b1848 100644
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 7030a644d..9e6877b7c 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index ae04f206a..d1455acaa 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAICompletion,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.models.llama.sku_list import all_registered_models
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 7cdd06a1f..78161d1cb 100644
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     EmbeddingsResponse,
     EmbeddingTaskType,
+    GreedySamplingStrategy,
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
     ToolConfig,
     ToolDefinition,
     ToolPromptFormat,
-)
-from llama_stack.apis.inference.inference import (
-    GreedySamplingStrategy,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
 )
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index c21f379c9..d19908368 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
-from llama_stack.apis.models.models import Model
+from llama_stack.apis.models import Model
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index d707e36c2..de67e5288 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -8,7 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference import (
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 01dfb8d61..47144ee0e 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
     CompletionResponse,
     CompletionResponseStreamChunk,
     GreedySamplingStrategy,
-    Message,
-    SamplingParams,
-    SystemMessage,
-    TokenLogProbs,
-    ToolChoice,
-    ToolResponseMessage,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    UserMessage,
-)
-from llama_stack.apis.inference.inference import (
     JsonSchemaResponseFormat,
+    Message,
     OpenAIChatCompletion,
     OpenAICompletion,
     OpenAICompletionChoice,
     OpenAIEmbeddingData,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
+    SamplingParams,
+    SystemMessage,
+    TokenLogProbs,
+    ToolChoice,
     ToolConfig,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChoice as OpenAIChatCompletionChoice,
 )
 from llama_stack.models.llama.datatypes import (
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 8b962db76..d00624aed 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -12,8 +12,7 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.files.files import OpenAIFileObject
+from llama_stack.apis.files import Files, OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 10081f037..4ae68ee1d 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -180,7 +180,7 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
 
     trace_id = generate_trace_id()
     context = TraceContext(BACKGROUND_LOGGER, trace_id)
-    attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
+    attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {})
     context.push_span(name, attributes)
 
     CURRENT_TRACE_CONTEXT.set(context)
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index d891502d8..f341a88c1 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
index afa8a23ce..7de8069ae 100644
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
index a7ec5f3b8..5a6f52a89 100644
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 5e8935361..ad29c648f 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
index 4e52aa42d..9e166a288 100644
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index 69e037299..23887469f 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index ecfe2a167..c58c0921d 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py
index b4641b9da..723cc44a3 100644
--- a/llama_stack/templates/llama_api/llama_api.py
+++ b/llama_stack/templates/llama_api/llama_api.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index 95d126095..57fb8f2af 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 46c4852a4..cba25296b 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index d944d4eff..f0738ae5b 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -6,7 +6,7 @@
 
 
 from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     BenchmarkInput,
     DatasetInput,
diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py
index 6a30625c5..1b94a9aae 100644
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py
index 759281567..a1a2aa2b7 100644
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index 2782a3ea0..94606e9d0 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 54a49423d..38df6a4be 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index ec01d08e9..8e111e80a 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 712d2dcb4..7badff140 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -13,7 +13,7 @@ import yaml
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     Api,
     BenchmarkInput,
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index 2c97cbf80..394cde18e 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index 7761bd9fd..4c64ff3cd 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 5775138b1..443fcd7a3 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py
index 802aaf8f1..7fa3a55e5 100644
--- a/llama_stack/templates/watsonx/watsonx.py
+++ b/llama_stack/templates/watsonx/watsonx.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/pyproject.toml b/pyproject.toml
index 968a3ae60..97624fade 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -204,6 +204,9 @@ unfixable = [
     "RUF001",
     "PLE2515",
 ]
+"llama_stack/apis/**/__init__.py" = [
+    "F403",
+] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
 mypy_path = ["llama_stack"]
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 9cbdc8e51..0eeb68167 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -13,7 +13,7 @@ import pytest
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
 from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models.models import Model, ModelType
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.shields.shields import Shield
 from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter
 from llama_stack.apis.vector_dbs.vector_dbs import VectorDB
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
index e112bb6e5..2ebcd9970 100644
--- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
+++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
@@ -8,7 +8,7 @@ import os
 
 import yaml
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
 )
 
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 7772dd2cc..0d1ef8eca 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -29,7 +29,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseTextFormat,
     WebSearchToolTypes,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartTextParam,
     OpenAIDeveloperMessageParam,
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 8c74f178b..73fc32a02 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from llama_stack.apis.inference.inference import CompletionMessage, UserMessage
+from llama_stack.apis.inference import CompletionMessage, UserMessage
 from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
 from llama_stack.apis.shields import Shield
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
index 4c75b8a2f..3598e4810 100644
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -7,7 +7,7 @@
 import pytest
 
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     CompletionMessage,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartTextParam,
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 67f8a138f..10fa1e075 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -35,7 +35,7 @@
 
 import pytest
 
-from llama_stack.apis.models.models import Model
+from llama_stack.apis.models import Model
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry