diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9b8b9a8df..8c5510b27 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -75,19 +75,19 @@ repos:
 #     - id: markdown-link-check
 #       args: ['--quiet']
 
-# -   repo: local
-#     hooks:
-#       - id: distro-codegen
-#         name: Distribution Template Codegen
-#         additional_dependencies:
-#           - rich
-#           - pydantic
-#         entry: python -m llama_stack.scripts.distro_codegen
-#         language: python
-#         pass_filenames: false
-#         require_serial: true
-#         files: ^llama_stack/templates/.*$
-#         stages: [manual]
+-   repo: local
+    hooks:
+      - id: distro-codegen
+        name: Distribution Template Codegen
+        additional_dependencies:
+          - rich
+          - pydantic
+          - uv==0.6.0
+        entry: uv run python -m llama_stack.scripts.distro_codegen
+        language: python
+        pass_filenames: false
+        require_serial: true
+        files: ^llama_stack/templates/.*$
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index 64c9f8c19..14f004926 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -61,7 +61,8 @@ docker run \
   --port $LLAMA_STACK_PORT \
   --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
   --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
+  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
 
 ### Via Conda
@@ -72,5 +73,6 @@ llama stack run ./run.yaml \
   --port $LLAMA_STACK_PORT \
   --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
   --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
+  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
index 542872091..15118c9df 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/__init__.py
@@ -9,10 +9,11 @@ from typing import Any, Dict
 from llama_stack.providers.datatypes import Api
 
 from .config import RagToolRuntimeConfig
-from .memory import MemoryToolRuntimeImpl
 
 
 async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]):
+    from .memory import MemoryToolRuntimeImpl
+
     impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py
index e896f0597..a706d4304 100644
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -27,12 +27,10 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionChoice,
@@ -47,20 +45,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
 
-MODEL_ALIASES = [
-    build_model_alias(
-        "meta.llama3-1-8b-instruct-v1:0",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta.llama3-1-70b-instruct-v1:0",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta.llama3-1-405b-instruct-v1:0",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py
new file mode 100644
index 000000000..b629e05d5
--- /dev/null
+++ b/llama_stack/providers/remote/inference/bedrock/models.py
@@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "meta.llama3-1-8b-instruct-v1:0",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta.llama3-1-70b-instruct-v1:0",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta.llama3-1-405b-instruct-v1:0",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 1ce267e8d..0d8824fd2 100644
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -26,10 +26,9 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId, TopKSamplingStrategy
+from llama_stack.models.llama.datatypes import TopKSamplingStrategy
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     get_sampling_options,
@@ -44,17 +43,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import CerebrasImplConfig
-
-model_aliases = [
-    build_model_alias(
-        "llama3.1-8b",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "llama-3.3-70b",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-]
+from .models import model_aliases
 
 
 class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py
new file mode 100644
index 000000000..03ffeb492
--- /dev/null
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+model_aliases = [
+    build_model_alias(
+        "llama3.1-8b",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "llama-3.3-70b",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index acf37b248..3b834673d 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -29,10 +29,8 @@ from llama_stack.apis.inference import (
     ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     convert_message_to_openai_dict,
@@ -51,49 +49,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import FireworksImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p3-70b-instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-guard-3-8b",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-guard-3-11b-vision",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
new file mode 100644
index 000000000..14de585d4
--- /dev/null
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-guard-3-8b",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-guard-3-11b-vision",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
new file mode 100644
index 000000000..1d9b575d4
--- /dev/null
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+_MODEL_ALIASES = [
+    build_model_alias(
+        "meta/llama3-8b-instruct",
+        CoreModelId.llama3_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama3-70b-instruct",
+        CoreModelId.llama3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    # TODO(mf): how do we handle Nemotron models?
+    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
+]
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 8e67333af..0da617858 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import logging
 import warnings
 from typing import AsyncIterator, List, Optional, Union
 
@@ -26,19 +25,14 @@ from llama_stack.apis.inference import (
     ToolChoice,
     ToolConfig,
 )
-from llama_stack.models.llama.datatypes import (
-    CoreModelId,
-    SamplingParams,
-    ToolDefinition,
-    ToolPromptFormat,
-)
+from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
 
 from . import NVIDIAConfig
+from .models import _MODEL_ALIASES
 from .openai_utils import (
     convert_chat_completion_request,
     convert_completion_request,
@@ -49,49 +43,6 @@ from .openai_utils import (
 )
 from .utils import _is_nvidia_hosted, check_health
 
-logger = logging.getLogger(__name__)
-
-_MODEL_ALIASES = [
-    build_model_alias(
-        "meta/llama3-8b-instruct",
-        CoreModelId.llama3_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama3-70b-instruct",
-        CoreModelId.llama3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    # TODO(mf): how do we handle Nemotron models?
-    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
-]
-
 
 class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
     def __init__(self, config: NVIDIAConfig) -> None:
diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/llama_stack/providers/remote/inference/sambanova/__init__.py
index ccf4bf1cb..3e682e69c 100644
--- a/llama_stack/providers/remote/inference/sambanova/__init__.py
+++ b/llama_stack/providers/remote/inference/sambanova/__init__.py
@@ -7,7 +7,6 @@
 from pydantic import BaseModel
 
 from .config import SambaNovaImplConfig
-from .sambanova import SambaNovaInferenceAdapter
 
 
 class SambaNovaProviderDataValidator(BaseModel):
@@ -15,6 +14,8 @@ class SambaNovaProviderDataValidator(BaseModel):
 
 
 async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
+    from .sambanova import SambaNovaInferenceAdapter
+
     assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
     impl = SambaNovaInferenceAdapter(config)
     await impl.initialize()
diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py
new file mode 100644
index 000000000..27a4a149e
--- /dev/null
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "Meta-Llama-3.1-8B-Instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.1-70B-Instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.1-405B-Instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.2-1B-Instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.2-3B-Instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.3-70B-Instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "Llama-3.2-11B-Vision-Instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "Llama-3.2-90B-Vision-Instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py
index b906e0dcb..9b3562870 100644
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -18,14 +18,12 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.models.llama.datatypes import (
-    CoreModelId,
     GreedySamplingStrategy,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
 )
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     process_chat_completion_stream_response,
@@ -35,45 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import SambaNovaImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "Meta-Llama-3.1-8B-Instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.1-70B-Instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.1-405B-Instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.2-1B-Instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.2-3B-Instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.3-70B-Instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "Llama-3.2-11B-Vision-Instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "Llama-3.2-90B-Vision-Instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/llama_stack/providers/remote/inference/tgi/__init__.py
index 451650323..834e51324 100644
--- a/llama_stack/providers/remote/inference/tgi/__init__.py
+++ b/llama_stack/providers/remote/inference/tgi/__init__.py
@@ -7,13 +7,14 @@
 from typing import Union
 
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
-from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
 
 
 async def get_adapter_impl(
     config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
     _deps,
 ):
+    from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
+
     if isinstance(config, TGIImplConfig):
         impl = TGIAdapter()
     elif isinstance(config, InferenceAPIImplConfig):
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
new file mode 100644
index 000000000..87d282ea5
--- /dev/null
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 054501da8..7a37ff616 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -28,10 +28,8 @@ from llama_stack.apis.inference import (
     ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
     convert_message_to_openai_dict,
@@ -50,45 +48,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import TogetherImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
+from .models import MODEL_ALIASES
 
 
 class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
index 2ddf7b4fe..fb1f558e5 100644
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
@@ -7,7 +7,6 @@
 from pydantic import BaseModel
 
 from .config import ModelContextProtocolConfig
-from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
 
 
 class ModelContextProtocolToolProviderDataValidator(BaseModel):
@@ -15,6 +14,8 @@ class ModelContextProtocolToolProviderDataValidator(BaseModel):
 
 
 async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
+    from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
+
     impl = ModelContextProtocolToolRuntimeImpl(config)
     await impl.initialize()
     return impl
diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py
index 825a039ef..1c44b4625 100644
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@@ -23,6 +23,22 @@ from llama_stack.distribution.build import (
 REPO_ROOT = Path(__file__).parent.parent.parent
 
 
+class ChangedPathTracker:
+    """Track a list of paths we may have changed."""
+
+    def __init__(self):
+        self._changed_paths = []
+
+    def add_paths(self, *paths):
+        for path in paths:
+            path = str(path)
+            if path not in self._changed_paths:
+                self._changed_paths.append(path)
+
+    def changed_paths(self):
+        return self._changed_paths
+
+
 def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
     """Find immediate subdirectories in the templates folder."""
     if not templates_dir.exists():
@@ -31,7 +47,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
     return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
 
 
-def process_template(template_dir: Path, progress) -> None:
+def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None:
     """Process a single template directory."""
     progress.print(f"Processing {template_dir.name}")
 
@@ -44,9 +60,12 @@ def process_template(template_dir: Path, progress) -> None:
         if template_func := getattr(module, "get_distribution_template", None):
             template = template_func()
 
+            yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name
+            doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro"
+            change_tracker.add_paths(yaml_output_dir, doc_output_dir)
             template.save_distribution(
-                yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
-                doc_output_dir=REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro",
+                yaml_output_dir=yaml_output_dir,
+                doc_output_dir=doc_output_dir,
             )
         else:
             progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function")
@@ -56,14 +75,19 @@ def process_template(template_dir: Path, progress) -> None:
         raise e
 
 
-def check_for_changes() -> bool:
+def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
     """Check if there are any uncommitted changes."""
-    result = subprocess.run(
-        ["git", "diff", "--exit-code"],
-        cwd=REPO_ROOT,
-        capture_output=True,
-    )
-    return result.returncode != 0
+    has_changes = False
+    for path in change_tracker.changed_paths():
+        result = subprocess.run(
+            ["git", "diff", "--exit-code", path],
+            cwd=REPO_ROOT,
+            capture_output=True,
+        )
+        if result.returncode != 0:
+            print(f"Change detected in '{path}'.", file=sys.stderr)
+            has_changes = True
+    return has_changes
 
 
 def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
@@ -83,7 +107,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
     return None, []
 
 
-def generate_dependencies_file():
+def generate_dependencies_file(change_tracker: ChangedPathTracker):
     templates_dir = REPO_ROOT / "llama_stack" / "templates"
     distribution_deps = {}
 
@@ -93,12 +117,14 @@ def generate_dependencies_file():
             distribution_deps[name] = deps
 
     deps_file = REPO_ROOT / "distributions" / "dependencies.json"
+    change_tracker.add_paths(deps_file)
     with open(deps_file, "w") as f:
         f.write(json.dumps(distribution_deps, indent=2) + "\n")
 
 
 def main():
     templates_dir = REPO_ROOT / "llama_stack" / "templates"
+    change_tracker = ChangedPathTracker()
 
     with Progress(
         SpinnerColumn(),
@@ -108,7 +134,7 @@ def main():
         task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
 
         # Create a partial function with the progress bar
-        process_func = partial(process_template, progress=progress)
+        process_func = partial(process_template, progress=progress, change_tracker=change_tracker)
 
         # Process templates in parallel
         with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -116,9 +142,9 @@ def main():
             list(executor.map(process_func, template_dirs))
             progress.update(task, advance=len(template_dirs))
 
-    generate_dependencies_file()
+    generate_dependencies_file(change_tracker)
 
-    if check_for_changes():
+    if check_for_changes(change_tracker):
         print(
             "Distribution template changes detected. Please commit the changes.",
             file=sys.stderr,
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index 0b294824d..550269f61 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
+from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index 4f6d0c8f3..5f3921102 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
+from llama_stack.providers.remote.inference.cerebras.models import model_aliases
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index a6809fef6..8d91c223d 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
+from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index ee22b5555..6bca48e99 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -9,7 +9,7 @@ from pathlib import Path
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.nvidia import _MODEL_ALIASES
+from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 48960c5ba..0fee6808c 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -6,7 +6,6 @@ distribution_spec:
     - remote::ollama
     vector_io:
     - inline::faiss
-    - inline::sqlite_vec
     - remote::chromadb
     - remote::pgvector
     safety:
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 9d5bfc7a0..4ce64cf59 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -20,6 +20,13 @@ providers:
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
   - provider_id: faiss
     provider_type: inline::faiss
     config:
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 9ac1f3267..b4982f8e2 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -34,7 +34,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index c7a9428af..dac7346a7 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
-from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
+from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index f7b18e32a..ef6847fb2 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
-from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
+from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings