fix: Get distro_codegen.py working with default deps

Before this change, `distro_codegen.py` would only work if the user manually installed multiple provider-specific dependencies. Now, users can run `distro_codegen.py` without any provider-specific dependencies because we avoid importing the entire provider implementations just to get the config needed to build the provider template. Concretely, this mostly means moving the MODEL_ALIASES/model_aliases/_MODEL_ALIASES definitions to a new models.py class within the provider implementation for those providers that require additional dependencies. It also meant moving a couple of imports from top-level imports to inside `get_adapter_impl` for some providers, which follows the pattern used by multiple existing providers. To ensure we don't regress and accidentally add new imports that cause distro_codegen.py to fail, the stubbed-in pre-commit hook for distro_codegen.py was uncommented and slightly tweaked to run via `uv run python ...` to ensure it runs with only the project's default dependencies and to run automatically instead of manually. Lastly, this updates distro_codegen.py itself to keep track of paths it might have changed and to only `git diff` those paths when checking for changed files instead of doing a diff on the entire working tree. The latter was overly broad and would require a user have no other unstaged changes in their working tree, even if those unstaged changes were unrelated to generated code. Now it only flags uncommitted changes for paths distro_codegen.py actually writes to. Our generated code was also out-of-date, presumably because of these issues, so this commit also has some updates to the generated code purely because it was out of sync, and the pre-commit hook now enforces things to be updated. (Closes #1122) Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-08-10 04:08:31 +00:00 · 2025-02-15 13:18:36 -05:00 · 2025-02-15 13:18:36 -05:00 · c7fffa2796
commit c7fffa2796
parent 743f434860
28 changed files with 334 additions and 240 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -75,19 +75,19 @@ repos:
 #     - id: markdown-link-check
 #       args: ['--quiet']
-# -   repo: local
+-   repo: local
-#     hooks:
+    hooks:
-#       - id: distro-codegen
+      - id: distro-codegen
-#         name: Distribution Template Codegen
+        name: Distribution Template Codegen
-#         additional_dependencies:
+        additional_dependencies:
-#           - rich
+          - rich
-#           - pydantic
+          - pydantic
-#         entry: python -m llama_stack.scripts.distro_codegen
+          - uv==0.6.0
-#         language: python
+        entry: uv run python -m llama_stack.scripts.distro_codegen
-#         pass_filenames: false
+        language: python
-#         require_serial: true
+        pass_filenames: false
-#         files: ^llama_stack/templates/.*$
+        require_serial: true
-#         stages: [manual]
+        files: ^llama_stack/templates/.*$
 ci:
    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@ -61,7 +61,8 @@ docker run \
  --port $LLAMA_STACK_PORT \
  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
 ### Via Conda
@ -72,5 +73,6 @@ llama stack run ./run.yaml \
  --port $LLAMA_STACK_PORT \
  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
+  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
 ```
--- a/llama_stack/providers/inline/tool_runtime/rag/init.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/init.py
@ -9,10 +9,11 @@ from typing import Any, Dict
 from llama_stack.providers.datatypes import Api
 from .config import RagToolRuntimeConfig
 from .memory import MemoryToolRuntimeImpl
 async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]):
    from .memory import MemoryToolRuntimeImpl
    impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@ -27,12 +27,10 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    OpenAICompatCompletionChoice,
@ -47,20 +45,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
    interleaved_content_as_str,
 )
-MODEL_ALIASES = [
+from .models import MODEL_ALIASES
    build_model_alias(
        "meta.llama3-1-8b-instruct-v1:0",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "meta.llama3-1-70b-instruct-v1:0",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "meta.llama3-1-405b-instruct-v1:0",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
 ]
 class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
--- a/llama_stack/providers/remote/inference/bedrock/models.py
+++ b/llama_stack/providers/remote/inference/bedrock/models.py
@ -0,0 +1,25 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
 )
 MODEL_ALIASES = [
    build_model_alias(
        "meta.llama3-1-8b-instruct-v1:0",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "meta.llama3-1-70b-instruct-v1:0",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "meta.llama3-1-405b-instruct-v1:0",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
 ]
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@ -26,10 +26,9 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.models.llama.datatypes import CoreModelId, TopKSamplingStrategy
+from llama_stack.models.llama.datatypes import TopKSamplingStrategy
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    get_sampling_options,
@ -44,17 +43,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 from .config import CerebrasImplConfig
-
+from .models import model_aliases
 model_aliases = [
    build_model_alias(
        "llama3.1-8b",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "llama-3.3-70b",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
 ]
 class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
--- a/llama_stack/providers/remote/inference/cerebras/models.py
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@ -0,0 +1,21 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
 )
 model_aliases = [
    build_model_alias(
        "llama3.1-8b",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "llama-3.3-70b",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
 ]
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@ -29,10 +29,8 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    convert_message_to_openai_dict,
@ -51,49 +49,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 from .config import FireworksImplConfig
-
+from .models import MODEL_ALIASES
 MODEL_ALIASES = [
    build_model_alias(
        "accounts/fireworks/models/llama-v3p1-8b-instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p1-70b-instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p1-405b-instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-1b-instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-3b-instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p3-70b-instruct",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-guard-3-8b",
        CoreModelId.llama_guard_3_8b.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-guard-3-11b-vision",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
 class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@ -0,0 +1,53 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
 )
 MODEL_ALIASES = [
    build_model_alias(
        "accounts/fireworks/models/llama-v3p1-8b-instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p1-70b-instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p1-405b-instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-1b-instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-3b-instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-v3p3-70b-instruct",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-guard-3-8b",
        CoreModelId.llama_guard_3_8b.value,
    ),
    build_model_alias(
        "accounts/fireworks/models/llama-guard-3-11b-vision",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@ -0,0 +1,51 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
 )
 _MODEL_ALIASES = [
    build_model_alias(
        "meta/llama3-8b-instruct",
        CoreModelId.llama3_8b_instruct.value,
    ),
    build_model_alias(
        "meta/llama3-70b-instruct",
        CoreModelId.llama3_70b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.1-8b-instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.1-70b-instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.1-405b-instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-1b-instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-3b-instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-11b-vision-instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-90b-vision-instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    # TODO(mf): how do we handle Nemotron models?
    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
 ]
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -25,14 +25,14 @@ from llama_stack.apis.inference import (
    ToolChoice,
    ToolConfig,
 )
-from llama_stack.models.llama.datatypes import CoreModelId, SamplingParams, ToolDefinition, ToolPromptFormat
+from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
    build_model_alias,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
 from . import NVIDIAConfig
 from .models import _MODEL_ALIASES
 from .openai_utils import (
    convert_chat_completion_request,
    convert_completion_request,
@ -43,47 +43,6 @@ from .openai_utils import (
 )
 from .utils import _is_nvidia_hosted, check_health
 _MODEL_ALIASES = [
    build_model_alias(
        "meta/llama3-8b-instruct",
        CoreModelId.llama3_8b_instruct.value,
    ),
    build_model_alias(
        "meta/llama3-70b-instruct",
        CoreModelId.llama3_70b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.1-8b-instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.1-70b-instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.1-405b-instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-1b-instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-3b-instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-11b-vision-instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "meta/llama-3.2-90b-vision-instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    # TODO(mf): how do we handle Nemotron models?
    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
 ]
 class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
    def __init__(self, config: NVIDIAConfig) -> None:
--- a/llama_stack/providers/remote/inference/sambanova/init.py
+++ b/llama_stack/providers/remote/inference/sambanova/init.py
@ -7,7 +7,6 @@
 from pydantic import BaseModel
 from .config import SambaNovaImplConfig
 from .sambanova import SambaNovaInferenceAdapter
 class SambaNovaProviderDataValidator(BaseModel):
@ -15,6 +14,8 @@ class SambaNovaProviderDataValidator(BaseModel):
 async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
    from .sambanova import SambaNovaInferenceAdapter
    assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
    impl = SambaNovaInferenceAdapter(config)
    await impl.initialize()
--- a/llama_stack/providers/remote/inference/sambanova/models.py
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@ -0,0 +1,49 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
 )
 MODEL_ALIASES = [
    build_model_alias(
        "Meta-Llama-3.1-8B-Instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.1-70B-Instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.1-405B-Instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.2-1B-Instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.2-3B-Instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.3-70B-Instruct",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_model_alias(
        "Llama-3.2-11B-Vision-Instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "Llama-3.2-90B-Vision-Instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-Guard-3-8B",
        CoreModelId.llama_guard_3_8b.value,
    ),
 ]
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@ -18,14 +18,12 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.models.llama.datatypes import (
    CoreModelId,
    GreedySamplingStrategy,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    process_chat_completion_stream_response,
@ -35,45 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 from .config import SambaNovaImplConfig
-
+from .models import MODEL_ALIASES
 MODEL_ALIASES = [
    build_model_alias(
        "Meta-Llama-3.1-8B-Instruct",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.1-70B-Instruct",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.1-405B-Instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.2-1B-Instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.2-3B-Instruct",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-3.3-70B-Instruct",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_model_alias(
        "Llama-3.2-11B-Vision-Instruct",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "Llama-3.2-90B-Vision-Instruct",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_model_alias(
        "Meta-Llama-Guard-3-8B",
        CoreModelId.llama_guard_3_8b.value,
    ),
 ]
 class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
--- a/llama_stack/providers/remote/inference/tgi/init.py
+++ b/llama_stack/providers/remote/inference/tgi/init.py
@ -7,13 +7,14 @@
 from typing import Union
 from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
 from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
 async def get_adapter_impl(
    config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
    _deps,
 ):
    from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
    if isinstance(config, TGIImplConfig):
        impl = TGIAdapter()
    elif isinstance(config, InferenceAPIImplConfig):
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -0,0 +1,49 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    build_model_alias,
 )
 MODEL_ALIASES = [
    build_model_alias(
        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Meta-Llama-Guard-3-8B",
        CoreModelId.llama_guard_3_8b.value,
    ),
    build_model_alias(
        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -28,10 +28,8 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.models.llama.datatypes import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
    build_model_alias,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    convert_message_to_openai_dict,
@ -50,45 +48,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 from .config import TogetherImplConfig
-
+from .models import MODEL_ALIASES
 MODEL_ALIASES = [
    build_model_alias(
        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        CoreModelId.llama3_1_8b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
        CoreModelId.llama3_1_70b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.2-3B-Instruct-Turbo",
        CoreModelId.llama3_2_3b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
        CoreModelId.llama3_2_11b_vision_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
        CoreModelId.llama3_2_90b_vision_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    build_model_alias(
        "meta-llama/Meta-Llama-Guard-3-8B",
        CoreModelId.llama_guard_3_8b.value,
    ),
    build_model_alias(
        "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
        CoreModelId.llama_guard_3_11b_vision.value,
    ),
 ]
 class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
--- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/init.py
+++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/init.py
@ -7,7 +7,6 @@
 from pydantic import BaseModel
 from .config import ModelContextProtocolConfig
 from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
 class ModelContextProtocolToolProviderDataValidator(BaseModel):
@ -15,6 +14,8 @@ class ModelContextProtocolToolProviderDataValidator(BaseModel):
 async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
    from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
    impl = ModelContextProtocolToolRuntimeImpl(config)
    await impl.initialize()
    return impl
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@ -23,6 +23,22 @@ from llama_stack.distribution.build import (
 REPO_ROOT = Path(__file__).parent.parent.parent
 class ChangedPathTracker:
    """Track a list of paths we may have changed."""
    def __init__(self):
        self._changed_paths = []
    def add_paths(self, *paths):
        for path in paths:
            path = str(path)
            if path not in self._changed_paths:
                self._changed_paths.append(path)
    def changed_paths(self):
        return self._changed_paths
 def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
    """Find immediate subdirectories in the templates folder."""
    if not templates_dir.exists():
@ -31,7 +47,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
    return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
-def process_template(template_dir: Path, progress) -> None:
+def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None:
    """Process a single template directory."""
    progress.print(f"Processing {template_dir.name}")
@ -44,9 +60,12 @@ def process_template(template_dir: Path, progress) -> None:
        if template_func := getattr(module, "get_distribution_template", None):
            template = template_func()
            yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name
            doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro"
            change_tracker.add_paths(yaml_output_dir, doc_output_dir)
            template.save_distribution(
-                yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
+                yaml_output_dir=yaml_output_dir,
-                doc_output_dir=REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro",
+                doc_output_dir=doc_output_dir,
            )
        else:
            progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function")
@ -56,14 +75,19 @@ def process_template(template_dir: Path, progress) -> None:
        raise e
-def check_for_changes() -> bool:
+def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
    """Check if there are any uncommitted changes."""
    has_changes = False
    for path in change_tracker.changed_paths():
        result = subprocess.run(
-        ["git", "diff", "--exit-code"],
+            ["git", "diff", "--exit-code", path],
            cwd=REPO_ROOT,
            capture_output=True,
        )
-    return result.returncode != 0
+        if result.returncode != 0:
            print(f"Change detected in '{path}'.", file=sys.stderr)
            has_changes = True
    return has_changes
 def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
@ -83,7 +107,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
    return None, []
-def generate_dependencies_file():
+def generate_dependencies_file(change_tracker: ChangedPathTracker):
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
    distribution_deps = {}
@ -93,12 +117,14 @@ def generate_dependencies_file():
            distribution_deps[name] = deps
    deps_file = REPO_ROOT / "distributions" / "dependencies.json"
    change_tracker.add_paths(deps_file)
    with open(deps_file, "w") as f:
        f.write(json.dumps(distribution_deps, indent=2) + "\n")
 def main():
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
    change_tracker = ChangedPathTracker()
    with Progress(
        SpinnerColumn(),
@ -108,7 +134,7 @@ def main():
        task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
        # Create a partial function with the progress bar
-        process_func = partial(process_template, progress=progress)
+        process_func = partial(process_template, progress=progress, change_tracker=change_tracker)
        # Process templates in parallel
        with concurrent.futures.ThreadPoolExecutor() as executor:
@ -116,9 +142,9 @@ def main():
            list(executor.map(process_func, template_dirs))
            progress.update(task, advance=len(template_dirs))
-    generate_dependencies_file()
+    generate_dependencies_file(change_tracker)
-    if check_for_changes():
+    if check_for_changes(change_tracker):
        print(
            "Distribution template changes detected. Please commit the changes.",
            file=sys.stderr,
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
+from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
+from llama_stack.providers.remote.inference.cerebras.models import model_aliases
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
+from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -9,7 +9,7 @@ from pathlib import Path
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.nvidia import _MODEL_ALIASES
+from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@ -6,7 +6,6 @@ distribution_spec:
    - remote::ollama
    vector_io:
    - inline::faiss
    - inline::sqlite_vec
    - remote::chromadb
    - remote::pgvector
    safety:
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -20,6 +20,13 @@ providers:
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
  - provider_id: faiss
    provider_type: inline::faiss
    config:
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -34,7 +34,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
-from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
+from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
-from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
+from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings