fix: Get distro_codegen.py working with default deps

Before this change, `distro_codegen.py` would only work if the user manually installed multiple provider-specific dependencies. Now, users can run `distro_codegen.py` without any provider-specific dependencies because we avoid importing the entire provider implementations just to get the config needed to build the provider template. Concretely, this mostly means moving the MODEL_ALIASES/model_aliases/_MODEL_ALIASES definitions to a new models.py class within the provider implementation for those providers that require additional dependencies. It also meant moving a couple of imports from top-level imports to inside `get_adapter_impl` for some providers, which follows the pattern used by multiple existing providers. To ensure we don't regress and accidentally add new imports that cause distro_codegen.py to fail, the stubbed-in pre-commit hook for distro_codegen.py was uncommented and slightly tweaked to run via `uv run python ...` to ensure it runs with only the project's default dependencies and to run automatically instead of manually. Lastly, this updates distro_codegen.py itself to keep track of paths it might have changed and to only `git diff` those paths when checking for changed files instead of doing a diff on the entire working tree. The latter was overly broad and would require a user have no other unstaged changes in their working tree, even if those unstaged changes were unrelated to generated code. Now it only flags uncommitted changes for paths distro_codegen.py actually writes to. Our generated code was also out-of-date, presumably because of these issues, so this commit also has some updates to the generated code purely because it was out of sync, and the pre-commit hook now enforces things to be updated. (Closes #1122) Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-08-10 04:08:31 +00:00 · 2025-02-15 13:18:36 -05:00 · 2025-02-15 13:18:36 -05:00 · c7fffa2796
commit c7fffa2796
parent 743f434860
28 changed files with 334 additions and 240 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -75,19 +75,19 @@ repos:
 #     - id: markdown-link-check
 #       args: ['--quiet']

-# -   repo: local
-#     hooks:
-#       - id: distro-codegen
-#         name: Distribution Template Codegen
-#         additional_dependencies:
-#           - rich
-#           - pydantic
-#         entry: python -m llama_stack.scripts.distro_codegen
-#         language: python
-#         pass_filenames: false
-#         require_serial: true
-#         files: ^llama_stack/templates/.*$
-#         stages: [manual]
+-   repo: local
+    hooks:
+      - id: distro-codegen
+        name: Distribution Template Codegen
+        additional_dependencies:
+          - rich
+          - pydantic
+          - uv==0.6.0
+        entry: uv run python -m llama_stack.scripts.distro_codegen
+        language: python
+        pass_filenames: false
+        require_serial: true
+        files: ^llama_stack/templates/.*$

 ci:
    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@ -61,7 +61,8 @@ docker run \
  --port $LLAMA_STACK_PORT \
  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
+  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```

 ### Via Conda
@ -72,5 +73,6 @@ llama stack run ./run.yaml \
  --port $LLAMA_STACK_PORT \
  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
+  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
--- a/llama_stack/providers/inline/tool_runtime/rag/init.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/init.py
@ -9,10 +9,11 @@ from typing import Any, Dict
 from llama_stack.providers.datatypes import Api

 from .config import RagToolRuntimeConfig
-from .memory import MemoryToolRuntimeImpl


 async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]):
+    from .memory import MemoryToolRuntimeImpl
+
    impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@ -27,12 +27,10 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    OpenAICompatCompletionChoice,
@ -47,20 +45,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
    interleaved_content_as_str,
 )

-MODEL_ALIASES = [
-    build_model_alias(
-        "meta.llama3-1-8b-instruct-v1:0",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta.llama3-1-70b-instruct-v1:0",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta.llama3-1-405b-instruct-v1:0",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-]
+from .models import MODEL_ALIASES


 class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
--- a/llama_stack/providers/remote/inference/bedrock/models.py
+++ b/llama_stack/providers/remote/inference/bedrock/models.py
@ -0,0 +1,25 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "meta.llama3-1-8b-instruct-v1:0",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta.llama3-1-70b-instruct-v1:0",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta.llama3-1-405b-instruct-v1:0",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+]
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@ -26,10 +26,9 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId, TopKSamplingStrategy
+from llama_stack.models.llama.datatypes import TopKSamplingStrategy
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    get_sampling_options,
@ -44,17 +43,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )

 from .config import CerebrasImplConfig
-
-model_aliases = [
-    build_model_alias(
-        "llama3.1-8b",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "llama-3.3-70b",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-]
+from .models import model_aliases


 class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
--- a/llama_stack/providers/remote/inference/cerebras/models.py
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@ -0,0 +1,21 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+model_aliases = [
+    build_model_alias(
+        "llama3.1-8b",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "llama-3.3-70b",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+]
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@ -29,10 +29,8 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    convert_message_to_openai_dict,
@ -51,49 +49,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )

 from .config import FireworksImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-v3p3-70b-instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-guard-3-8b",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_model_alias(
-        "accounts/fireworks/models/llama-guard-3-11b-vision",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
+from .models import MODEL_ALIASES


 class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-guard-3-8b",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+    build_model_alias(
+        "accounts/fireworks/models/llama-guard-3-11b-vision",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+_MODEL_ALIASES = [
+    build_model_alias(
+        "meta/llama3-8b-instruct",
+        CoreModelId.llama3_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama3-70b-instruct",
+        CoreModelId.llama3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta/llama-3.2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    # TODO(mf): how do we handle Nemotron models?
+    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
+]
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -25,14 +25,14 @@ from llama_stack.apis.inference import (
    ToolChoice,
    ToolConfig,
 )
-from llama_stack.models.llama.datatypes import CoreModelId, SamplingParams, ToolDefinition, ToolPromptFormat
+from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import content_has_media

 from . import NVIDIAConfig
+from .models import _MODEL_ALIASES
 from .openai_utils import (
    convert_chat_completion_request,
    convert_completion_request,
@ -43,47 +43,6 @@ from .openai_utils import (
 )
 from .utils import _is_nvidia_hosted, check_health

-_MODEL_ALIASES = [
-    build_model_alias(
-        "meta/llama3-8b-instruct",
-        CoreModelId.llama3_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama3-70b-instruct",
-        CoreModelId.llama3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta/llama-3.2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    # TODO(mf): how do we handle Nemotron models?
-    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
-]
-

 class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
    def __init__(self, config: NVIDIAConfig) -> None:
--- a/llama_stack/providers/remote/inference/sambanova/init.py
+++ b/llama_stack/providers/remote/inference/sambanova/init.py
@ -7,7 +7,6 @@
 from pydantic import BaseModel

 from .config import SambaNovaImplConfig
-from .sambanova import SambaNovaInferenceAdapter


 class SambaNovaProviderDataValidator(BaseModel):
@ -15,6 +14,8 @@ class SambaNovaProviderDataValidator(BaseModel):


 async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
+    from .sambanova import SambaNovaInferenceAdapter
+
    assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
    impl = SambaNovaInferenceAdapter(config)
    await impl.initialize()
--- a/llama_stack/providers/remote/inference/sambanova/models.py
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "Meta-Llama-3.1-8B-Instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.1-70B-Instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.1-405B-Instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.2-1B-Instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.2-3B-Instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-3.3-70B-Instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "Llama-3.2-11B-Vision-Instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "Llama-3.2-90B-Vision-Instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+]
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@ -18,14 +18,12 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.models.llama.datatypes import (
-    CoreModelId,
    GreedySamplingStrategy,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    process_chat_completion_stream_response,
@ -35,45 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )

 from .config import SambaNovaImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "Meta-Llama-3.1-8B-Instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.1-70B-Instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.1-405B-Instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.2-1B-Instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.2-3B-Instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-3.3-70B-Instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "Llama-3.2-11B-Vision-Instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "Llama-3.2-90B-Vision-Instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-]
+from .models import MODEL_ALIASES


 class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
--- a/llama_stack/providers/remote/inference/tgi/init.py
+++ b/llama_stack/providers/remote/inference/tgi/init.py
@ -7,13 +7,14 @@
 from typing import Union

 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
-from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter


 async def get_adapter_impl(
    config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
    _deps,
 ):
+    from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
+
    if isinstance(config, TGIImplConfig):
        impl = TGIAdapter()
    elif isinstance(config, InferenceAPIImplConfig):
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.datatypes import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+)
+
+MODEL_ALIASES = [
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_model_alias(
+        "meta-llama/Meta-Llama-Guard-3-8B",
+        CoreModelId.llama_guard_3_8b.value,
+    ),
+    build_model_alias(
+        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -28,10 +28,8 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
-from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
-    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    convert_message_to_openai_dict,
@ -50,45 +48,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )

 from .config import TogetherImplConfig
-
-MODEL_ALIASES = [
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_model_alias(
-        "meta-llama/Meta-Llama-Guard-3-8B",
-        CoreModelId.llama_guard_3_8b.value,
-    ),
-    build_model_alias(
-        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
+from .models import MODEL_ALIASES


 class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/init.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/init.py
@ -7,7 +7,6 @@
 from pydantic import BaseModel

 from .config import ModelContextProtocolConfig
-from .model_context_protocol import ModelContextProtocolToolRuntimeImpl


 class ModelContextProtocolToolProviderDataValidator(BaseModel):
@ -15,6 +14,8 @@ class ModelContextProtocolToolProviderDataValidator(BaseModel):


 async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
+    from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
+
    impl = ModelContextProtocolToolRuntimeImpl(config)
    await impl.initialize()
    return impl
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@ -23,6 +23,22 @@ from llama_stack.distribution.build import (
 REPO_ROOT = Path(__file__).parent.parent.parent


+class ChangedPathTracker:
+    """Track a list of paths we may have changed."""
+
+    def __init__(self):
+        self._changed_paths = []
+
+    def add_paths(self, *paths):
+        for path in paths:
+            path = str(path)
+            if path not in self._changed_paths:
+                self._changed_paths.append(path)
+
+    def changed_paths(self):
+        return self._changed_paths
+
+
 def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
    """Find immediate subdirectories in the templates folder."""
    if not templates_dir.exists():
@ -31,7 +47,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
    return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")


-def process_template(template_dir: Path, progress) -> None:
+def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None:
    """Process a single template directory."""
    progress.print(f"Processing {template_dir.name}")

@ -44,9 +60,12 @@ def process_template(template_dir: Path, progress) -> None:
        if template_func := getattr(module, "get_distribution_template", None):
            template = template_func()

+            yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name
+            doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro"
+            change_tracker.add_paths(yaml_output_dir, doc_output_dir)
            template.save_distribution(
-                yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
-                doc_output_dir=REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro",
+                yaml_output_dir=yaml_output_dir,
+                doc_output_dir=doc_output_dir,
            )
        else:
            progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function")
@ -56,14 +75,19 @@ def process_template(template_dir: Path, progress) -> None:
        raise e


-def check_for_changes() -> bool:
+def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
    """Check if there are any uncommitted changes."""
+    has_changes = False
+    for path in change_tracker.changed_paths():
        result = subprocess.run(
-        ["git", "diff", "--exit-code"],
+            ["git", "diff", "--exit-code", path],
            cwd=REPO_ROOT,
            capture_output=True,
        )
-    return result.returncode != 0
+        if result.returncode != 0:
+            print(f"Change detected in '{path}'.", file=sys.stderr)
+            has_changes = True
+    return has_changes


 def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
@ -83,7 +107,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
    return None, []


-def generate_dependencies_file():
+def generate_dependencies_file(change_tracker: ChangedPathTracker):
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
    distribution_deps = {}

@ -93,12 +117,14 @@ def generate_dependencies_file():
            distribution_deps[name] = deps

    deps_file = REPO_ROOT / "distributions" / "dependencies.json"
+    change_tracker.add_paths(deps_file)
    with open(deps_file, "w") as f:
        f.write(json.dumps(distribution_deps, indent=2) + "\n")


 def main():
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
+    change_tracker = ChangedPathTracker()

    with Progress(
        SpinnerColumn(),
@ -108,7 +134,7 @@ def main():
        task = progress.add_task("Processing distribution templates...", total=len(template_dirs))

        # Create a partial function with the progress bar
-        process_func = partial(process_template, progress=progress)
+        process_func = partial(process_template, progress=progress, change_tracker=change_tracker)

        # Process templates in parallel
        with concurrent.futures.ThreadPoolExecutor() as executor:
@ -116,9 +142,9 @@ def main():
            list(executor.map(process_func, template_dirs))
            progress.update(task, advance=len(template_dirs))

-    generate_dependencies_file()
+    generate_dependencies_file(change_tracker)

-    if check_for_changes():
+    if check_for_changes(change_tracker):
        print(
            "Distribution template changes detected. Please commit the changes.",
            file=sys.stderr,
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
+from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
+from llama_stack.providers.remote.inference.cerebras.models import model_aliases
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
+from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -9,7 +9,7 @@ from pathlib import Path
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.nvidia import _MODEL_ALIASES
+from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@ -6,7 +6,6 @@ distribution_spec:
    - remote::ollama
    vector_io:
    - inline::faiss
-    - inline::sqlite_vec
    - remote::chromadb
    - remote::pgvector
    safety:
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -20,6 +20,13 @@ providers:
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
  - provider_id: faiss
    provider_type: inline::faiss
    config:
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -34,7 +34,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
-from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
+from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
-from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
+from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings