From e9b8259cf948622fcae99ca664ba37e33a82afd5 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 19 Feb 2025 21:39:20 -0500
Subject: [PATCH] fix: Get distro_codegen.py working with default deps and
 enabled in pre-commit hooks (#1123)

# What does this PR do?

Before this change, `distro_codegen.py` would only work if the user
manually installed multiple provider-specific dependencies (see #1122).
Now, users can run `distro_codegen.py` without any provider-specific
dependencies because we avoid importing the entire provider
implementations just to get the config needed to build the provider
template.

Concretely, this mostly means moving the
MODEL_ALIASES (and related variants) definitions to a new models.py
class within the provider implementation for those providers that
require additional dependencies. It also meant moving a couple of
imports from top-level imports to inside `get_adapter_impl` for some
providers, which follows the pattern used by multiple existing
providers.

To ensure we don't regress and accidentally add new imports that cause
distro_codegen.py to fail, the stubbed-in pre-commit hook for
distro_codegen.py was uncommented and slightly tweaked to run via `uv
run python ...` to ensure it runs with only the project's default
dependencies and to run automatically instead of manually.

Lastly, this updates distro_codegen.py itself to keep track of paths it
might have changed and to only `git diff` those specific paths when
checking for changed files instead of doing a diff on the entire working
tree. The latter was overly broad and would require a user have no other
unstaged changes in their working tree, even if those unstaged changes
were unrelated to generated code. Now it only flags uncommitted changes
for paths distro_codegen.py actually writes to.

Our generated code was also out-of-date, presumably because of these
issues, so this commit also has some updates to the generated code
purely because it was out of sync, and the pre-commit hook now enforces
things to be updated.

(Closes #1122)

## Test Plan

I manually tested distro_codegen.py and the pre-commit hook to verify
those work as expected, flagging any uncommited changes and catching any
imports that attempt to pull in provider-specific dependencies.

However, I do not have valid api keys to the impacted provider
implementations, and am unable to easily run the inference tests against
each changed provider. There are no functional changes to the provider
implementations here, but I'd appreciate a second set of eyes on the
changed import statements and moving of MODEL_ALIASES type code to a
separate models.py to ensure I didn't make any obvious errors.

---------

Signed-off-by: Ben Browning <bbrownin@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .pre-commit-config.yaml                       | 26 ++++-----
 .../self_hosted_distro/bedrock.md             |  6 ++-
 .../inline/tool_runtime/rag/__init__.py       |  3 +-
 .../remote/inference/bedrock/bedrock.py       | 17 +-----
 .../remote/inference/bedrock/models.py        | 25 +++++++++
 .../remote/inference/cerebras/cerebras.py     | 15 +-----
 .../remote/inference/cerebras/models.py       | 21 ++++++++
 .../remote/inference/fireworks/fireworks.py   | 46 +---------------
 .../remote/inference/fireworks/models.py      | 53 ++++++++++++++++++
 .../remote/inference/nvidia/models.py         | 51 ++++++++++++++++++
 .../remote/inference/nvidia/nvidia.py         | 53 +-----------------
 .../remote/inference/sambanova/__init__.py    |  3 +-
 .../remote/inference/sambanova/models.py      | 49 +++++++++++++++++
 .../remote/inference/sambanova/sambanova.py   | 42 +--------------
 .../remote/inference/tgi/__init__.py          |  3 +-
 .../remote/inference/together/models.py       | 49 +++++++++++++++++
 .../remote/inference/together/together.py     | 42 +--------------
 .../model_context_protocol/__init__.py        |  3 +-
 llama_stack/scripts/distro_codegen.py         | 54 ++++++++++++++-----
 llama_stack/templates/bedrock/bedrock.py      |  2 +-
 llama_stack/templates/cerebras/cerebras.py    |  2 +-
 llama_stack/templates/fireworks/fireworks.py  |  2 +-
 llama_stack/templates/nvidia/nvidia.py        |  2 +-
 llama_stack/templates/ollama/build.yaml       |  1 -
 .../templates/ollama/run-with-safety.yaml     |  7 +++
 llama_stack/templates/ollama/run.yaml         |  1 -
 llama_stack/templates/sambanova/sambanova.py  |  2 +-
 llama_stack/templates/together/together.py    |  2 +-
 28 files changed, 334 insertions(+), 248 deletions(-)
 create mode 100644 llama_stack/providers/remote/inference/bedrock/models.py
 create mode 100644 llama_stack/providers/remote/inference/cerebras/models.py
 create mode 100644 llama_stack/providers/remote/inference/fireworks/models.py
 create mode 100644 llama_stack/providers/remote/inference/nvidia/models.py
 create mode 100644 llama_stack/providers/remote/inference/sambanova/models.py
 create mode 100644 llama_stack/providers/remote/inference/together/models.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9b8b9a8df..8c5510b27 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -75,19 +75,19 @@ repos:
 #     - id: markdown-link-check
 #       args: ['--quiet']
 
-# -   repo: local
-#     hooks:
-#       - id: distro-codegen
-#         name: Distribution Template Codegen
-#         additional_dependencies:
-#           - rich
-#           - pydantic
-#         entry: python -m llama_stack.scripts.distro_codegen
-#         language: python
-#         pass_filenames: false
-#         require_serial: true
-#         files: ^llama_stack/templates/.*$
-#         stages: [manual]
+-   repo: local
+    hooks:
+      - id: distro-codegen
+        name: Distribution Template Codegen
+        additional_dependencies:
+          - rich
+          - pydantic
+          - uv==0.6.0
+        entry: uv run python -m llama_stack.scripts.distro_codegen
+        language: python
+        pass_filenames: false
+        require_serial: true
+        files: ^llama_stack/templates/.*$
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index 64c9f8c19..14f004926 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -61,7 +61,8 @@ docker run \
   --port $LLAMA_STACK_PORT \
   --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
   --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
+  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
 
 ### Via Conda
@@ -72,5 +73,6 @@ llama stack run ./run.yaml \
   --port $LLAMA_STACK_PORT \
   --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
   --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
+  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index 542872091..15118c9df 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -9,10 +9,11 @@ from typing import Any, Dict
 from llama_stack.providers.datatypes import Api
 
 from .config import RagToolRuntimeConfig
-from .memory import MemoryToolRuntimeImpl
 
 
 async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]):
+    from .memory import MemoryToolRuntimeImpl
+
     impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py
index e896f0597..a706d4304 100644
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -27,12 +27,10 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionChoice,
@@ -47,20 +45,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
 
-MODEL_ALIASES = [
-    build_model_alias(
-        "meta.llama3-1-8b-instruct-v1:0",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta.llama3-1-70b-instruct-v1:0",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta.llama3-1-405b-instruct-v1:0",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py
new file mode 100644
index 000000000..b629e05d5
--- /dev/null
+++ b/llama_stack/providers/remote/inference/bedrock/models.py
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "meta.llama3-1-8b-instruct-v1:0",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta.llama3-1-70b-instruct-v1:0",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta.llama3-1-405b-instruct-v1:0",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 1ce267e8d..0d8824fd2 100644
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -26,10 +26,9 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId, TopKSamplingStrategy
+from llama_stack.models.llama.datatypes import TopKSamplingStrategy
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     get_sampling_options,
@@ -44,17 +43,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import CerebrasImplConfig
-
-model_aliases = [
-    build_model_alias(
-        "llama3.1-8b",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "llama-3.3-70b",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-]
+from .models import model_aliases
 
 
 class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py
new file mode 100644
index 000000000..03ffeb492
--- /dev/null
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+model_aliases = [
+    build_model_alias(
+        "llama3.1-8b",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "llama-3.3-70b",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index acf37b248..3b834673d 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -29,10 +29,8 @@ from llama_stack.apis.inference import (
     ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     convert_message_to_openai_dict,
@@ -51,49 +49,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import FireworksImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p3-70b-instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-guard-3-8b",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-guard-3-11b-vision",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
new file mode 100644
index 000000000..14de585d4
--- /dev/null
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-guard-3-8b",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-guard-3-11b-vision",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
new file mode 100644
index 000000000..1d9b575d4
--- /dev/null
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+_MODEL_ALIASES = [
+    build_model_alias(
+        "meta/llama3-8b-instruct",
+        CoreModelId.llama3_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama3-70b-instruct",
+        CoreModelId.llama3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    # TODO(mf): how do we handle Nemotron models?
+    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
+]
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 8e67333af..0da617858 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import warnings
 from typing import AsyncIterator, List, Optional, Union
 
@@ -26,19 +25,14 @@ from llama_stack.apis.inference import (
     ToolChoice,
     ToolConfig,
 )
-from llama_stack.models.llama.datatypes import (
-    CoreModelId,
-    SamplingParams,
-    ToolDefinition,
-    ToolPromptFormat,
-)
+from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
 
 from . import NVIDIAConfig
+from .models import _MODEL_ALIASES
 from .openai_utils import (
     convert_chat_completion_request,
     convert_completion_request,
@@ -49,49 +43,6 @@ from .openai_utils import (
 )
 from .utils import _is_nvidia_hosted, check_health
 
-logger = logging.getLogger(__name__)
-
-_MODEL_ALIASES = [
-    build_model_alias(
-        "meta/llama3-8b-instruct",
-        CoreModelId.llama3_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama3-70b-instruct",
-        CoreModelId.llama3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    # TODO(mf): how do we handle Nemotron models?
-    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
-]
-
 
 class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
     def __init__(self, config: NVIDIAConfig) -> None:
diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/llama_stack/providers/remote/inference/sambanova/__init__.py
index ccf4bf1cb..3e682e69c 100644
--- a/llama_stack/providers/remote/inference/sambanova/__init__.py
+++ b/llama_stack/providers/remote/inference/sambanova/__init__.py
@@ -7,7 +7,6 @@
 from pydantic import BaseModel
 
 from .config import SambaNovaImplConfig
-from .sambanova import SambaNovaInferenceAdapter
 
 
 class SambaNovaProviderDataValidator(BaseModel):
@@ -15,6 +14,8 @@ class SambaNovaProviderDataValidator(BaseModel):
 
 
 async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
+    from .sambanova import SambaNovaInferenceAdapter
+
     assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
     impl = SambaNovaInferenceAdapter(config)
     await impl.initialize()
diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py
new file mode 100644
index 000000000..27a4a149e
--- /dev/null
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "Meta-Llama-3.1-8B-Instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.1-70B-Instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.1-405B-Instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.2-1B-Instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.2-3B-Instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.3-70B-Instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "Llama-3.2-11B-Vision-Instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "Llama-3.2-90B-Vision-Instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py
index b906e0dcb..9b3562870 100644
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -18,14 +18,12 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.models.llama.datatypes import (
-    CoreModelId,
     GreedySamplingStrategy,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
 )
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     process_chat_completion_stream_response,
@@ -35,45 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import SambaNovaImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "Meta-Llama-3.1-8B-Instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.1-70B-Instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.1-405B-Instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.2-1B-Instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.2-3B-Instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.3-70B-Instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "Llama-3.2-11B-Vision-Instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "Llama-3.2-90B-Vision-Instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/llama_stack/providers/remote/inference/tgi/__init__.py
index 451650323..834e51324 100644
--- a/llama_stack/providers/remote/inference/tgi/__init__.py
+++ b/llama_stack/providers/remote/inference/tgi/__init__.py
@@ -7,13 +7,14 @@
 from typing import Union
 
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
-from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
 
 
 async def get_adapter_impl(
     config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
     _deps,
 ):
+    from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
+
     if isinstance(config, TGIImplConfig):
         impl = TGIAdapter()
     elif isinstance(config, InferenceAPIImplConfig):
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
new file mode 100644
index 000000000..87d282ea5
--- /dev/null
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 054501da8..7a37ff616 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -28,10 +28,8 @@ from llama_stack.apis.inference import (
     ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     convert_message_to_openai_dict,
@@ -50,45 +48,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import TogetherImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
index 2ddf7b4fe..fb1f558e5 100644
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
@@ -7,7 +7,6 @@
 from pydantic import BaseModel
 
 from .config import ModelContextProtocolConfig
-from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
 
 
 class ModelContextProtocolToolProviderDataValidator(BaseModel):
@@ -15,6 +14,8 @@ class ModelContextProtocolToolProviderDataValidator(BaseModel):
 
 
 async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
+    from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
+
     impl = ModelContextProtocolToolRuntimeImpl(config)
     await impl.initialize()
     return impl
diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py
index 825a039ef..1c44b4625 100644
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@@ -23,6 +23,22 @@ from llama_stack.distribution.build import (
 REPO_ROOT = Path(__file__).parent.parent.parent
 
 
+class ChangedPathTracker:
+    """Track a list of paths we may have changed."""
+
+    def __init__(self):
+        self._changed_paths = []
+
+    def add_paths(self, *paths):
+        for path in paths:
+            path = str(path)
+            if path not in self._changed_paths:
+                self._changed_paths.append(path)
+
+    def changed_paths(self):
+        return self._changed_paths
+
+
 def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
     """Find immediate subdirectories in the templates folder."""
     if not templates_dir.exists():
@@ -31,7 +47,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
     return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
 
 
-def process_template(template_dir: Path, progress) -> None:
+def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None:
     """Process a single template directory."""
     progress.print(f"Processing {template_dir.name}")
 
@@ -44,9 +60,12 @@ def process_template(template_dir: Path, progress) -> None:
         if template_func := getattr(module, "get_distribution_template", None):
             template = template_func()
 
+            yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name
+            doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro"
+            change_tracker.add_paths(yaml_output_dir, doc_output_dir)
             template.save_distribution(
-                yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
-                doc_output_dir=REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro",
+                yaml_output_dir=yaml_output_dir,
+                doc_output_dir=doc_output_dir,
             )
         else:
             progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function")
@@ -56,14 +75,19 @@ def process_template(template_dir: Path, progress) -> None:
         raise e
 
 
-def check_for_changes() -> bool:
+def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
     """Check if there are any uncommitted changes."""
-    result = subprocess.run(
-        ["git", "diff", "--exit-code"],
-        cwd=REPO_ROOT,
-        capture_output=True,
-    )
-    return result.returncode != 0
+    has_changes = False
+    for path in change_tracker.changed_paths():
+        result = subprocess.run(
+            ["git", "diff", "--exit-code", path],
+            cwd=REPO_ROOT,
+            capture_output=True,
+        )
+        if result.returncode != 0:
+            print(f"Change detected in '{path}'.", file=sys.stderr)
+            has_changes = True
+    return has_changes
 
 
 def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
@@ -83,7 +107,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
     return None, []
 
 
-def generate_dependencies_file():
+def generate_dependencies_file(change_tracker: ChangedPathTracker):
     templates_dir = REPO_ROOT / "llama_stack" / "templates"
     distribution_deps = {}
 
@@ -93,12 +117,14 @@ def generate_dependencies_file():
             distribution_deps[name] = deps
 
     deps_file = REPO_ROOT / "distributions" / "dependencies.json"
+    change_tracker.add_paths(deps_file)
     with open(deps_file, "w") as f:
         f.write(json.dumps(distribution_deps, indent=2) + "\n")
 
 
 def main():
     templates_dir = REPO_ROOT / "llama_stack" / "templates"
+    change_tracker = ChangedPathTracker()
 
     with Progress(
         SpinnerColumn(),
@@ -108,7 +134,7 @@ def main():
         task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
 
         # Create a partial function with the progress bar
-        process_func = partial(process_template, progress=progress)
+        process_func = partial(process_template, progress=progress, change_tracker=change_tracker)
 
         # Process templates in parallel
         with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -116,9 +142,9 @@ def main():
             list(executor.map(process_func, template_dirs))
             progress.update(task, advance=len(template_dirs))
 
-    generate_dependencies_file()
+    generate_dependencies_file(change_tracker)
 
-    if check_for_changes():
+    if check_for_changes(change_tracker):
         print(
             "Distribution template changes detected. Please commit the changes.",
             file=sys.stderr,
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index 0b294824d..550269f61 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
+from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index 4f6d0c8f3..5f3921102 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
+from llama_stack.providers.remote.inference.cerebras.models import model_aliases
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index a6809fef6..8d91c223d 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
+from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index ee22b5555..6bca48e99 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -9,7 +9,7 @@ from pathlib import Path
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.nvidia import _MODEL_ALIASES
+from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 48960c5ba..0fee6808c 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -6,7 +6,6 @@ distribution_spec:
     - remote::ollama
     vector_io:
     - inline::faiss
-    - inline::sqlite_vec
     - remote::chromadb
     - remote::pgvector
     safety:
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 9d5bfc7a0..4ce64cf59 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -20,6 +20,13 @@ providers:
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
   - provider_id: faiss
     provider_type: inline::faiss
     config:
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 9ac1f3267..b4982f8e2 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -34,7 +34,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index c7a9428af..dac7346a7 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
-from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
+from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index f7b18e32a..ef6847fb2 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
-from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
+from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings