diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b8b9a8df..8c5510b27 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -75,19 +75,19 @@ repos: # - id: markdown-link-check # args: ['--quiet'] -# - repo: local -# hooks: -# - id: distro-codegen -# name: Distribution Template Codegen -# additional_dependencies: -# - rich -# - pydantic -# entry: python -m llama_stack.scripts.distro_codegen -# language: python -# pass_filenames: false -# require_serial: true -# files: ^llama_stack/templates/.*$ -# stages: [manual] +- repo: local + hooks: + - id: distro-codegen + name: Distribution Template Codegen + additional_dependencies: + - rich + - pydantic + - uv==0.6.0 + entry: uv run python -m llama_stack.scripts.distro_codegen + language: python + pass_filenames: false + require_serial: true + files: ^llama_stack/templates/.*$ ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 64c9f8c19..14f004926 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -61,7 +61,8 @@ docker run \ --port $LLAMA_STACK_PORT \ --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN + --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ + --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION ``` ### Via Conda @@ -72,5 +73,6 @@ llama stack run ./run.yaml \ --port $LLAMA_STACK_PORT \ --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN + --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ + --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION ``` diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/llama_stack/providers/inline/tool_runtime/rag/__init__.py index 542872091..15118c9df 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/__init__.py +++ b/llama_stack/providers/inline/tool_runtime/rag/__init__.py @@ -9,10 +9,11 @@ from typing import Any, Dict from llama_stack.providers.datatypes import Api from .config import RagToolRuntimeConfig -from .memory import MemoryToolRuntimeImpl async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]): + from .memory import MemoryToolRuntimeImpl + impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference]) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index e896f0597..a706d4304 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -27,12 +27,10 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig from llama_stack.providers.utils.bedrock.client import create_bedrock_client from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -47,20 +45,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) -MODEL_ALIASES = [ - build_model_alias( - "meta.llama3-1-8b-instruct-v1:0", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_model_alias( - "meta.llama3-1-70b-instruct-v1:0", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_model_alias( - "meta.llama3-1-405b-instruct-v1:0", - CoreModelId.llama3_1_405b_instruct.value, - ), -] +from .models import MODEL_ALIASES class BedrockInferenceAdapter(ModelRegistryHelper, Inference): diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py new file mode 100644 index 000000000..b629e05d5 --- /dev/null +++ b/llama_stack/providers/remote/inference/bedrock/models.py @@ -0,0 +1,25 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.models.llama.datatypes import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + build_model_alias, +) + +MODEL_ALIASES = [ + build_model_alias( + "meta.llama3-1-8b-instruct-v1:0", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_model_alias( + "meta.llama3-1-70b-instruct-v1:0", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_model_alias( + "meta.llama3-1-405b-instruct-v1:0", + CoreModelId.llama3_1_405b_instruct.value, + ), +] diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 1ce267e8d..0d8824fd2 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -26,10 +26,9 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.models.llama.datatypes import CoreModelId, TopKSamplingStrategy +from llama_stack.models.llama.datatypes import TopKSamplingStrategy from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, @@ -44,17 +43,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import CerebrasImplConfig - -model_aliases = [ - build_model_alias( - "llama3.1-8b", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_model_alias( - "llama-3.3-70b", - CoreModelId.llama3_3_70b_instruct.value, - ), -] +from .models import model_aliases class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py new file mode 100644 index 000000000..03ffeb492 --- /dev/null +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -0,0 +1,21 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.models.llama.datatypes import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + build_model_alias, +) + +model_aliases = [ + build_model_alias( + "llama3.1-8b", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_model_alias( + "llama-3.3-70b", + CoreModelId.llama3_3_70b_instruct.value, + ), +] diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index acf37b248..3b834673d 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -29,10 +29,8 @@ from llama_stack.apis.inference import ( ToolPromptFormat, ) from llama_stack.distribution.request_headers import NeedsRequestProviderData -from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( convert_message_to_openai_dict, @@ -51,49 +49,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import FireworksImplConfig - -MODEL_ALIASES = [ - build_model_alias( - "accounts/fireworks/models/llama-v3p1-8b-instruct", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p1-70b-instruct", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p1-405b-instruct", - CoreModelId.llama3_1_405b_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p2-1b-instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p2-3b-instruct", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-v3p3-70b-instruct", - CoreModelId.llama3_3_70b_instruct.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-guard-3-8b", - CoreModelId.llama_guard_3_8b.value, - ), - build_model_alias( - "accounts/fireworks/models/llama-guard-3-11b-vision", - CoreModelId.llama_guard_3_11b_vision.value, - ), -] +from .models import MODEL_ALIASES class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData): diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py new file mode 100644 index 000000000..14de585d4 --- /dev/null +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -0,0 +1,53 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.models.llama.datatypes import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + build_model_alias, +) + +MODEL_ALIASES = [ + build_model_alias( + "accounts/fireworks/models/llama-v3p1-8b-instruct", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p1-70b-instruct", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p1-405b-instruct", + CoreModelId.llama3_1_405b_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p2-1b-instruct", + CoreModelId.llama3_2_1b_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p2-3b-instruct", + CoreModelId.llama3_2_3b_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", + CoreModelId.llama3_2_90b_vision_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-v3p3-70b-instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-guard-3-8b", + CoreModelId.llama_guard_3_8b.value, + ), + build_model_alias( + "accounts/fireworks/models/llama-guard-3-11b-vision", + CoreModelId.llama_guard_3_11b_vision.value, + ), +] diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py new file mode 100644 index 000000000..1d9b575d4 --- /dev/null +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.models.llama.datatypes import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + build_model_alias, +) + +_MODEL_ALIASES = [ + build_model_alias( + "meta/llama3-8b-instruct", + CoreModelId.llama3_8b_instruct.value, + ), + build_model_alias( + "meta/llama3-70b-instruct", + CoreModelId.llama3_70b_instruct.value, + ), + build_model_alias( + "meta/llama-3.1-8b-instruct", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_model_alias( + "meta/llama-3.1-70b-instruct", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_model_alias( + "meta/llama-3.1-405b-instruct", + CoreModelId.llama3_1_405b_instruct.value, + ), + build_model_alias( + "meta/llama-3.2-1b-instruct", + CoreModelId.llama3_2_1b_instruct.value, + ), + build_model_alias( + "meta/llama-3.2-3b-instruct", + CoreModelId.llama3_2_3b_instruct.value, + ), + build_model_alias( + "meta/llama-3.2-11b-vision-instruct", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), + build_model_alias( + "meta/llama-3.2-90b-vision-instruct", + CoreModelId.llama3_2_90b_vision_instruct.value, + ), + # TODO(mf): how do we handle Nemotron models? + # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct", +] diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 8e67333af..0da617858 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import logging import warnings from typing import AsyncIterator, List, Optional, Union @@ -26,19 +25,14 @@ from llama_stack.apis.inference import ( ToolChoice, ToolConfig, ) -from llama_stack.models.llama.datatypes import ( - CoreModelId, - SamplingParams, - ToolDefinition, - ToolPromptFormat, -) +from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, ) from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from . import NVIDIAConfig +from .models import _MODEL_ALIASES from .openai_utils import ( convert_chat_completion_request, convert_completion_request, @@ -49,49 +43,6 @@ from .openai_utils import ( ) from .utils import _is_nvidia_hosted, check_health -logger = logging.getLogger(__name__) - -_MODEL_ALIASES = [ - build_model_alias( - "meta/llama3-8b-instruct", - CoreModelId.llama3_8b_instruct.value, - ), - build_model_alias( - "meta/llama3-70b-instruct", - CoreModelId.llama3_70b_instruct.value, - ), - build_model_alias( - "meta/llama-3.1-8b-instruct", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_model_alias( - "meta/llama-3.1-70b-instruct", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_model_alias( - "meta/llama-3.1-405b-instruct", - CoreModelId.llama3_1_405b_instruct.value, - ), - build_model_alias( - "meta/llama-3.2-1b-instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), - build_model_alias( - "meta/llama-3.2-3b-instruct", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_model_alias( - "meta/llama-3.2-11b-vision-instruct", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_model_alias( - "meta/llama-3.2-90b-vision-instruct", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - # TODO(mf): how do we handle Nemotron models? - # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct", -] - class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: NVIDIAConfig) -> None: diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/llama_stack/providers/remote/inference/sambanova/__init__.py index ccf4bf1cb..3e682e69c 100644 --- a/llama_stack/providers/remote/inference/sambanova/__init__.py +++ b/llama_stack/providers/remote/inference/sambanova/__init__.py @@ -7,7 +7,6 @@ from pydantic import BaseModel from .config import SambaNovaImplConfig -from .sambanova import SambaNovaInferenceAdapter class SambaNovaProviderDataValidator(BaseModel): @@ -15,6 +14,8 @@ class SambaNovaProviderDataValidator(BaseModel): async def get_adapter_impl(config: SambaNovaImplConfig, _deps): + from .sambanova import SambaNovaInferenceAdapter + assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}" impl = SambaNovaInferenceAdapter(config) await impl.initialize() diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py new file mode 100644 index 000000000..27a4a149e --- /dev/null +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -0,0 +1,49 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.models.llama.datatypes import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + build_model_alias, +) + +MODEL_ALIASES = [ + build_model_alias( + "Meta-Llama-3.1-8B-Instruct", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_model_alias( + "Meta-Llama-3.1-70B-Instruct", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_model_alias( + "Meta-Llama-3.1-405B-Instruct", + CoreModelId.llama3_1_405b_instruct.value, + ), + build_model_alias( + "Meta-Llama-3.2-1B-Instruct", + CoreModelId.llama3_2_1b_instruct.value, + ), + build_model_alias( + "Meta-Llama-3.2-3B-Instruct", + CoreModelId.llama3_2_3b_instruct.value, + ), + build_model_alias( + "Meta-Llama-3.3-70B-Instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), + build_model_alias( + "Llama-3.2-11B-Vision-Instruct", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), + build_model_alias( + "Llama-3.2-90B-Vision-Instruct", + CoreModelId.llama3_2_90b_vision_instruct.value, + ), + build_model_alias( + "Meta-Llama-Guard-3-8B", + CoreModelId.llama_guard_3_8b.value, + ), +] diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index b906e0dcb..9b3562870 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -18,14 +18,12 @@ from llama_stack.apis.common.content_types import ( ) from llama_stack.apis.inference import * # noqa: F403 from llama_stack.models.llama.datatypes import ( - CoreModelId, GreedySamplingStrategy, TopKSamplingStrategy, TopPSamplingStrategy, ) from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( process_chat_completion_stream_response, @@ -35,45 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import SambaNovaImplConfig - -MODEL_ALIASES = [ - build_model_alias( - "Meta-Llama-3.1-8B-Instruct", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_model_alias( - "Meta-Llama-3.1-70B-Instruct", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_model_alias( - "Meta-Llama-3.1-405B-Instruct", - CoreModelId.llama3_1_405b_instruct.value, - ), - build_model_alias( - "Meta-Llama-3.2-1B-Instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), - build_model_alias( - "Meta-Llama-3.2-3B-Instruct", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_model_alias( - "Meta-Llama-3.3-70B-Instruct", - CoreModelId.llama3_3_70b_instruct.value, - ), - build_model_alias( - "Llama-3.2-11B-Vision-Instruct", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_model_alias( - "Llama-3.2-90B-Vision-Instruct", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - build_model_alias( - "Meta-Llama-Guard-3-8B", - CoreModelId.llama_guard_3_8b.value, - ), -] +from .models import MODEL_ALIASES class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference): diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/llama_stack/providers/remote/inference/tgi/__init__.py index 451650323..834e51324 100644 --- a/llama_stack/providers/remote/inference/tgi/__init__.py +++ b/llama_stack/providers/remote/inference/tgi/__init__.py @@ -7,13 +7,14 @@ from typing import Union from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig -from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter async def get_adapter_impl( config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig], _deps, ): + from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter + if isinstance(config, TGIImplConfig): impl = TGIAdapter() elif isinstance(config, InferenceAPIImplConfig): diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py new file mode 100644 index 000000000..87d282ea5 --- /dev/null +++ b/llama_stack/providers/remote/inference/together/models.py @@ -0,0 +1,49 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.models.llama.datatypes import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + build_model_alias, +) + +MODEL_ALIASES = [ + build_model_alias( + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_model_alias( + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_model_alias( + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", + CoreModelId.llama3_1_405b_instruct.value, + ), + build_model_alias( + "meta-llama/Llama-3.2-3B-Instruct-Turbo", + CoreModelId.llama3_2_3b_instruct.value, + ), + build_model_alias( + "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), + build_model_alias( + "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", + CoreModelId.llama3_2_90b_vision_instruct.value, + ), + build_model_alias( + "meta-llama/Llama-3.3-70B-Instruct-Turbo", + CoreModelId.llama3_3_70b_instruct.value, + ), + build_model_alias( + "meta-llama/Meta-Llama-Guard-3-8B", + CoreModelId.llama_guard_3_8b.value, + ), + build_model_alias( + "meta-llama/Llama-Guard-3-11B-Vision-Turbo", + CoreModelId.llama_guard_3_11b_vision.value, + ), +] diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 054501da8..7a37ff616 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -28,10 +28,8 @@ from llama_stack.apis.inference import ( ToolPromptFormat, ) from llama_stack.distribution.request_headers import NeedsRequestProviderData -from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( convert_message_to_openai_dict, @@ -50,45 +48,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import TogetherImplConfig - -MODEL_ALIASES = [ - build_model_alias( - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_model_alias( - "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_model_alias( - "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", - CoreModelId.llama3_1_405b_instruct.value, - ), - build_model_alias( - "meta-llama/Llama-3.2-3B-Instruct-Turbo", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_model_alias( - "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_model_alias( - "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - build_model_alias( - "meta-llama/Llama-3.3-70B-Instruct-Turbo", - CoreModelId.llama3_3_70b_instruct.value, - ), - build_model_alias( - "meta-llama/Meta-Llama-Guard-3-8B", - CoreModelId.llama_guard_3_8b.value, - ), - build_model_alias( - "meta-llama/Llama-Guard-3-11B-Vision-Turbo", - CoreModelId.llama_guard_3_11b_vision.value, - ), -] +from .models import MODEL_ALIASES class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData): diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py index 2ddf7b4fe..fb1f558e5 100644 --- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py @@ -7,7 +7,6 @@ from pydantic import BaseModel from .config import ModelContextProtocolConfig -from .model_context_protocol import ModelContextProtocolToolRuntimeImpl class ModelContextProtocolToolProviderDataValidator(BaseModel): @@ -15,6 +14,8 @@ class ModelContextProtocolToolProviderDataValidator(BaseModel): async def get_adapter_impl(config: ModelContextProtocolConfig, _deps): + from .model_context_protocol import ModelContextProtocolToolRuntimeImpl + impl = ModelContextProtocolToolRuntimeImpl(config) await impl.initialize() return impl diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py index 825a039ef..1c44b4625 100644 --- a/llama_stack/scripts/distro_codegen.py +++ b/llama_stack/scripts/distro_codegen.py @@ -23,6 +23,22 @@ from llama_stack.distribution.build import ( REPO_ROOT = Path(__file__).parent.parent.parent +class ChangedPathTracker: + """Track a list of paths we may have changed.""" + + def __init__(self): + self._changed_paths = [] + + def add_paths(self, *paths): + for path in paths: + path = str(path) + if path not in self._changed_paths: + self._changed_paths.append(path) + + def changed_paths(self): + return self._changed_paths + + def find_template_dirs(templates_dir: Path) -> Iterator[Path]: """Find immediate subdirectories in the templates folder.""" if not templates_dir.exists(): @@ -31,7 +47,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]: return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__") -def process_template(template_dir: Path, progress) -> None: +def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None: """Process a single template directory.""" progress.print(f"Processing {template_dir.name}") @@ -44,9 +60,12 @@ def process_template(template_dir: Path, progress) -> None: if template_func := getattr(module, "get_distribution_template", None): template = template_func() + yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name + doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro" + change_tracker.add_paths(yaml_output_dir, doc_output_dir) template.save_distribution( - yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name, - doc_output_dir=REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro", + yaml_output_dir=yaml_output_dir, + doc_output_dir=doc_output_dir, ) else: progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function") @@ -56,14 +75,19 @@ def process_template(template_dir: Path, progress) -> None: raise e -def check_for_changes() -> bool: +def check_for_changes(change_tracker: ChangedPathTracker) -> bool: """Check if there are any uncommitted changes.""" - result = subprocess.run( - ["git", "diff", "--exit-code"], - cwd=REPO_ROOT, - capture_output=True, - ) - return result.returncode != 0 + has_changes = False + for path in change_tracker.changed_paths(): + result = subprocess.run( + ["git", "diff", "--exit-code", path], + cwd=REPO_ROOT, + capture_output=True, + ) + if result.returncode != 0: + print(f"Change detected in '{path}'.", file=sys.stderr) + has_changes = True + return has_changes def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]: @@ -83,7 +107,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]: return None, [] -def generate_dependencies_file(): +def generate_dependencies_file(change_tracker: ChangedPathTracker): templates_dir = REPO_ROOT / "llama_stack" / "templates" distribution_deps = {} @@ -93,12 +117,14 @@ def generate_dependencies_file(): distribution_deps[name] = deps deps_file = REPO_ROOT / "distributions" / "dependencies.json" + change_tracker.add_paths(deps_file) with open(deps_file, "w") as f: f.write(json.dumps(distribution_deps, indent=2) + "\n") def main(): templates_dir = REPO_ROOT / "llama_stack" / "templates" + change_tracker = ChangedPathTracker() with Progress( SpinnerColumn(), @@ -108,7 +134,7 @@ def main(): task = progress.add_task("Processing distribution templates...", total=len(template_dirs)) # Create a partial function with the progress bar - process_func = partial(process_template, progress=progress) + process_func = partial(process_template, progress=progress, change_tracker=change_tracker) # Process templates in parallel with concurrent.futures.ThreadPoolExecutor() as executor: @@ -116,9 +142,9 @@ def main(): list(executor.map(process_func, template_dirs)) progress.update(task, advance=len(template_dirs)) - generate_dependencies_file() + generate_dependencies_file(change_tracker) - if check_for_changes(): + if check_for_changes(change_tracker): print( "Distribution template changes detected. Please commit the changes.", file=sys.stderr, diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 0b294824d..550269f61 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig -from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES +from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 4f6d0c8f3..5f3921102 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig -from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases +from llama_stack.providers.remote.inference.cerebras.models import model_aliases from llama_stack.templates.template import DistributionTemplate, RunConfigSettings diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index a6809fef6..8d91c223d 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig -from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES +from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index ee22b5555..6bca48e99 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -9,7 +9,7 @@ from pathlib import Path from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.nvidia import _MODEL_ALIASES +from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index 48960c5ba..0fee6808c 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -6,7 +6,6 @@ distribution_spec: - remote::ollama vector_io: - inline::faiss - - inline::sqlite_vec - remote::chromadb - remote::pgvector safety: diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 9d5bfc7a0..4ce64cf59 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -20,6 +20,13 @@ providers: provider_type: inline::sentence-transformers config: {} vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - provider_id: faiss provider_type: inline::faiss config: diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 9ac1f3267..b4982f8e2 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -34,7 +34,6 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index c7a9428af..dac7346a7 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import ( ) from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig -from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES +from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index f7b18e32a..ef6847fb2 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig -from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES +from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings