mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-10 04:08:31 +00:00
fix: Get distro_codegen.py working with default deps
Before this change, `distro_codegen.py` would only work if the user manually installed multiple provider-specific dependencies. Now, users can run `distro_codegen.py` without any provider-specific dependencies because we avoid importing the entire provider implementations just to get the config needed to build the provider template. Concretely, this mostly means moving the MODEL_ALIASES/model_aliases/_MODEL_ALIASES definitions to a new models.py class within the provider implementation for those providers that require additional dependencies. It also meant moving a couple of imports from top-level imports to inside `get_adapter_impl` for some providers, which follows the pattern used by multiple existing providers. To ensure we don't regress and accidentally add new imports that cause distro_codegen.py to fail, the stubbed-in pre-commit hook for distro_codegen.py was uncommented and slightly tweaked to run via `uv run python ...` to ensure it runs with only the project's default dependencies and to run automatically instead of manually. Lastly, this updates distro_codegen.py itself to keep track of paths it might have changed and to only `git diff` those paths when checking for changed files instead of doing a diff on the entire working tree. The latter was overly broad and would require a user have no other unstaged changes in their working tree, even if those unstaged changes were unrelated to generated code. Now it only flags uncommitted changes for paths distro_codegen.py actually writes to. Our generated code was also out-of-date, presumably because of these issues, so this commit also has some updates to the generated code purely because it was out of sync, and the pre-commit hook now enforces things to be updated. (Closes #1122) Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
743f434860
commit
c7fffa2796
28 changed files with 334 additions and 240 deletions
|
@ -75,19 +75,19 @@ repos:
|
||||||
# - id: markdown-link-check
|
# - id: markdown-link-check
|
||||||
# args: ['--quiet']
|
# args: ['--quiet']
|
||||||
|
|
||||||
# - repo: local
|
- repo: local
|
||||||
# hooks:
|
hooks:
|
||||||
# - id: distro-codegen
|
- id: distro-codegen
|
||||||
# name: Distribution Template Codegen
|
name: Distribution Template Codegen
|
||||||
# additional_dependencies:
|
additional_dependencies:
|
||||||
# - rich
|
- rich
|
||||||
# - pydantic
|
- pydantic
|
||||||
# entry: python -m llama_stack.scripts.distro_codegen
|
- uv==0.6.0
|
||||||
# language: python
|
entry: uv run python -m llama_stack.scripts.distro_codegen
|
||||||
# pass_filenames: false
|
language: python
|
||||||
# require_serial: true
|
pass_filenames: false
|
||||||
# files: ^llama_stack/templates/.*$
|
require_serial: true
|
||||||
# stages: [manual]
|
files: ^llama_stack/templates/.*$
|
||||||
|
|
||||||
ci:
|
ci:
|
||||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
||||||
|
|
|
@ -61,7 +61,8 @@ docker run \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||||
--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||||
--env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
|
--env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
|
||||||
|
--env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via Conda
|
||||||
|
@ -72,5 +73,6 @@ llama stack run ./run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
--env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||||
--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
--env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||||
--env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN
|
--env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
|
||||||
|
--env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
|
||||||
```
|
```
|
||||||
|
|
|
@ -9,10 +9,11 @@ from typing import Any, Dict
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
from .config import RagToolRuntimeConfig
|
from .config import RagToolRuntimeConfig
|
||||||
from .memory import MemoryToolRuntimeImpl
|
|
||||||
|
|
||||||
|
|
||||||
async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]):
|
async def get_provider_impl(config: RagToolRuntimeConfig, deps: Dict[str, Any]):
|
||||||
|
from .memory import MemoryToolRuntimeImpl
|
||||||
|
|
||||||
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
|
impl = MemoryToolRuntimeImpl(config, deps[Api.vector_io], deps[Api.inference])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -27,12 +27,10 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
|
||||||
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
||||||
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_model_alias,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
OpenAICompatCompletionChoice,
|
OpenAICompatCompletionChoice,
|
||||||
|
@ -47,20 +45,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
interleaved_content_as_str,
|
interleaved_content_as_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
MODEL_ALIASES = [
|
from .models import MODEL_ALIASES
|
||||||
build_model_alias(
|
|
||||||
"meta.llama3-1-8b-instruct-v1:0",
|
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta.llama3-1-70b-instruct-v1:0",
|
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta.llama3-1-405b-instruct-v1:0",
|
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
25
llama_stack/providers/remote/inference/bedrock/models.py
Normal file
25
llama_stack/providers/remote/inference/bedrock/models.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
MODEL_ALIASES = [
|
||||||
|
build_model_alias(
|
||||||
|
"meta.llama3-1-8b-instruct-v1:0",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta.llama3-1-70b-instruct-v1:0",
|
||||||
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta.llama3-1-405b-instruct-v1:0",
|
||||||
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
|
),
|
||||||
|
]
|
|
@ -26,10 +26,9 @@ from llama_stack.apis.inference import (
|
||||||
ToolDefinition,
|
ToolDefinition,
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId, TopKSamplingStrategy
|
from llama_stack.models.llama.datatypes import TopKSamplingStrategy
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_model_alias,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
get_sampling_options,
|
get_sampling_options,
|
||||||
|
@ -44,17 +43,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import CerebrasImplConfig
|
from .config import CerebrasImplConfig
|
||||||
|
from .models import model_aliases
|
||||||
model_aliases = [
|
|
||||||
build_model_alias(
|
|
||||||
"llama3.1-8b",
|
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"llama-3.3-70b",
|
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
21
llama_stack/providers/remote/inference/cerebras/models.py
Normal file
21
llama_stack/providers/remote/inference/cerebras/models.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
model_aliases = [
|
||||||
|
build_model_alias(
|
||||||
|
"llama3.1-8b",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"llama-3.3-70b",
|
||||||
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
]
|
|
@ -29,10 +29,8 @@ from llama_stack.apis.inference import (
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_model_alias,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
convert_message_to_openai_dict,
|
convert_message_to_openai_dict,
|
||||||
|
@ -51,49 +49,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import FireworksImplConfig
|
from .config import FireworksImplConfig
|
||||||
|
from .models import MODEL_ALIASES
|
||||||
MODEL_ALIASES = [
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p1-8b-instruct",
|
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p1-70b-instruct",
|
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p1-405b-instruct",
|
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p2-1b-instruct",
|
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p2-3b-instruct",
|
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
|
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
|
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-v3p3-70b-instruct",
|
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-guard-3-8b",
|
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"accounts/fireworks/models/llama-guard-3-11b-vision",
|
|
||||||
CoreModelId.llama_guard_3_11b_vision.value,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||||
|
|
53
llama_stack/providers/remote/inference/fireworks/models.py
Normal file
53
llama_stack/providers/remote/inference/fireworks/models.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
MODEL_ALIASES = [
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p1-70b-instruct",
|
||||||
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p1-405b-instruct",
|
||||||
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p2-1b-instruct",
|
||||||
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p2-3b-instruct",
|
||||||
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
|
||||||
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
|
||||||
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||||
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-guard-3-8b",
|
||||||
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"accounts/fireworks/models/llama-guard-3-11b-vision",
|
||||||
|
CoreModelId.llama_guard_3_11b_vision.value,
|
||||||
|
),
|
||||||
|
]
|
51
llama_stack/providers/remote/inference/nvidia/models.py
Normal file
51
llama_stack/providers/remote/inference/nvidia/models.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
_MODEL_ALIASES = [
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama3-8b-instruct",
|
||||||
|
CoreModelId.llama3_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama3-70b-instruct",
|
||||||
|
CoreModelId.llama3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.1-8b-instruct",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.1-70b-instruct",
|
||||||
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.1-405b-instruct",
|
||||||
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.2-1b-instruct",
|
||||||
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.2-3b-instruct",
|
||||||
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.2-11b-vision-instruct",
|
||||||
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta/llama-3.2-90b-vision-instruct",
|
||||||
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
# TODO(mf): how do we handle Nemotron models?
|
||||||
|
# "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
|
||||||
|
]
|
|
@ -25,14 +25,14 @@ from llama_stack.apis.inference import (
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId, SamplingParams, ToolDefinition, ToolPromptFormat
|
from llama_stack.models.llama.datatypes import SamplingParams, ToolDefinition, ToolPromptFormat
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_model_alias,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
||||||
|
|
||||||
from . import NVIDIAConfig
|
from . import NVIDIAConfig
|
||||||
|
from .models import _MODEL_ALIASES
|
||||||
from .openai_utils import (
|
from .openai_utils import (
|
||||||
convert_chat_completion_request,
|
convert_chat_completion_request,
|
||||||
convert_completion_request,
|
convert_completion_request,
|
||||||
|
@ -43,47 +43,6 @@ from .openai_utils import (
|
||||||
)
|
)
|
||||||
from .utils import _is_nvidia_hosted, check_health
|
from .utils import _is_nvidia_hosted, check_health
|
||||||
|
|
||||||
_MODEL_ALIASES = [
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama3-8b-instruct",
|
|
||||||
CoreModelId.llama3_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama3-70b-instruct",
|
|
||||||
CoreModelId.llama3_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.1-8b-instruct",
|
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.1-70b-instruct",
|
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.1-405b-instruct",
|
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.2-1b-instruct",
|
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.2-3b-instruct",
|
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.2-11b-vision-instruct",
|
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta/llama-3.2-90b-vision-instruct",
|
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
# TODO(mf): how do we handle Nemotron models?
|
|
||||||
# "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
||||||
def __init__(self, config: NVIDIAConfig) -> None:
|
def __init__(self, config: NVIDIAConfig) -> None:
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
from .sambanova import SambaNovaInferenceAdapter
|
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaProviderDataValidator(BaseModel):
|
class SambaNovaProviderDataValidator(BaseModel):
|
||||||
|
@ -15,6 +14,8 @@ class SambaNovaProviderDataValidator(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
|
async def get_adapter_impl(config: SambaNovaImplConfig, _deps):
|
||||||
|
from .sambanova import SambaNovaInferenceAdapter
|
||||||
|
|
||||||
assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
|
assert isinstance(config, SambaNovaImplConfig), f"Unexpected config type: {type(config)}"
|
||||||
impl = SambaNovaInferenceAdapter(config)
|
impl = SambaNovaInferenceAdapter(config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
|
|
49
llama_stack/providers/remote/inference/sambanova/models.py
Normal file
49
llama_stack/providers/remote/inference/sambanova/models.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
MODEL_ALIASES = [
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.1-8B-Instruct",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.1-70B-Instruct",
|
||||||
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.1-405B-Instruct",
|
||||||
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.2-1B-Instruct",
|
||||||
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.2-3B-Instruct",
|
||||||
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.3-70B-Instruct",
|
||||||
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Llama-3.2-11B-Vision-Instruct",
|
||||||
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Llama-3.2-90B-Vision-Instruct",
|
||||||
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-Guard-3-8B",
|
||||||
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
|
),
|
||||||
|
]
|
|
@ -18,14 +18,12 @@ from llama_stack.apis.common.content_types import (
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference import * # noqa: F403
|
from llama_stack.apis.inference import * # noqa: F403
|
||||||
from llama_stack.models.llama.datatypes import (
|
from llama_stack.models.llama.datatypes import (
|
||||||
CoreModelId,
|
|
||||||
GreedySamplingStrategy,
|
GreedySamplingStrategy,
|
||||||
TopKSamplingStrategy,
|
TopKSamplingStrategy,
|
||||||
TopPSamplingStrategy,
|
TopPSamplingStrategy,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_model_alias,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
process_chat_completion_stream_response,
|
process_chat_completion_stream_response,
|
||||||
|
@ -35,45 +33,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import SambaNovaImplConfig
|
from .config import SambaNovaImplConfig
|
||||||
|
from .models import MODEL_ALIASES
|
||||||
MODEL_ALIASES = [
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-3.1-8B-Instruct",
|
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-3.1-70B-Instruct",
|
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-3.1-405B-Instruct",
|
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-3.2-1B-Instruct",
|
|
||||||
CoreModelId.llama3_2_1b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-3.2-3B-Instruct",
|
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-3.3-70B-Instruct",
|
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Llama-3.2-11B-Vision-Instruct",
|
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Llama-3.2-90B-Vision-Instruct",
|
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"Meta-Llama-Guard-3-8B",
|
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
|
class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
|
|
|
@ -7,13 +7,14 @@
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
|
from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig
|
||||||
from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
|
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(
|
async def get_adapter_impl(
|
||||||
config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
|
config: Union[InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig],
|
||||||
_deps,
|
_deps,
|
||||||
):
|
):
|
||||||
|
from .tgi import InferenceAPIAdapter, InferenceEndpointAdapter, TGIAdapter
|
||||||
|
|
||||||
if isinstance(config, TGIImplConfig):
|
if isinstance(config, TGIImplConfig):
|
||||||
impl = TGIAdapter()
|
impl = TGIAdapter()
|
||||||
elif isinstance(config, InferenceAPIImplConfig):
|
elif isinstance(config, InferenceAPIImplConfig):
|
||||||
|
|
49
llama_stack/providers/remote/inference/together/models.py
Normal file
49
llama_stack/providers/remote/inference/together/models.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.models.llama.datatypes import CoreModelId
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_model_alias,
|
||||||
|
)
|
||||||
|
|
||||||
|
MODEL_ALIASES = [
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_1_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_1_405b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||||
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Meta-Llama-Guard-3-8B",
|
||||||
|
CoreModelId.llama_guard_3_8b.value,
|
||||||
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
||||||
|
CoreModelId.llama_guard_3_11b_vision.value,
|
||||||
|
),
|
||||||
|
]
|
|
@ -28,10 +28,8 @@ from llama_stack.apis.inference import (
|
||||||
ToolPromptFormat,
|
ToolPromptFormat,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.models.llama.datatypes import CoreModelId
|
|
||||||
from llama_stack.providers.utils.inference.model_registry import (
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
build_model_alias,
|
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
convert_message_to_openai_dict,
|
convert_message_to_openai_dict,
|
||||||
|
@ -50,45 +48,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import TogetherImplConfig
|
from .config import TogetherImplConfig
|
||||||
|
from .models import MODEL_ALIASES
|
||||||
MODEL_ALIASES = [
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_1_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_1_405b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Meta-Llama-Guard-3-8B",
|
|
||||||
CoreModelId.llama_guard_3_8b.value,
|
|
||||||
),
|
|
||||||
build_model_alias(
|
|
||||||
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
|
||||||
CoreModelId.llama_guard_3_11b_vision.value,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData):
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from .config import ModelContextProtocolConfig
|
from .config import ModelContextProtocolConfig
|
||||||
from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
|
|
||||||
|
|
||||||
|
|
||||||
class ModelContextProtocolToolProviderDataValidator(BaseModel):
|
class ModelContextProtocolToolProviderDataValidator(BaseModel):
|
||||||
|
@ -15,6 +14,8 @@ class ModelContextProtocolToolProviderDataValidator(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
|
async def get_adapter_impl(config: ModelContextProtocolConfig, _deps):
|
||||||
|
from .model_context_protocol import ModelContextProtocolToolRuntimeImpl
|
||||||
|
|
||||||
impl = ModelContextProtocolToolRuntimeImpl(config)
|
impl = ModelContextProtocolToolRuntimeImpl(config)
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -23,6 +23,22 @@ from llama_stack.distribution.build import (
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||||
|
|
||||||
|
|
||||||
|
class ChangedPathTracker:
|
||||||
|
"""Track a list of paths we may have changed."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._changed_paths = []
|
||||||
|
|
||||||
|
def add_paths(self, *paths):
|
||||||
|
for path in paths:
|
||||||
|
path = str(path)
|
||||||
|
if path not in self._changed_paths:
|
||||||
|
self._changed_paths.append(path)
|
||||||
|
|
||||||
|
def changed_paths(self):
|
||||||
|
return self._changed_paths
|
||||||
|
|
||||||
|
|
||||||
def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
|
def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
|
||||||
"""Find immediate subdirectories in the templates folder."""
|
"""Find immediate subdirectories in the templates folder."""
|
||||||
if not templates_dir.exists():
|
if not templates_dir.exists():
|
||||||
|
@ -31,7 +47,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
|
||||||
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
|
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
|
||||||
|
|
||||||
|
|
||||||
def process_template(template_dir: Path, progress) -> None:
|
def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None:
|
||||||
"""Process a single template directory."""
|
"""Process a single template directory."""
|
||||||
progress.print(f"Processing {template_dir.name}")
|
progress.print(f"Processing {template_dir.name}")
|
||||||
|
|
||||||
|
@ -44,9 +60,12 @@ def process_template(template_dir: Path, progress) -> None:
|
||||||
if template_func := getattr(module, "get_distribution_template", None):
|
if template_func := getattr(module, "get_distribution_template", None):
|
||||||
template = template_func()
|
template = template_func()
|
||||||
|
|
||||||
|
yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name
|
||||||
|
doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro"
|
||||||
|
change_tracker.add_paths(yaml_output_dir, doc_output_dir)
|
||||||
template.save_distribution(
|
template.save_distribution(
|
||||||
yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
|
yaml_output_dir=yaml_output_dir,
|
||||||
doc_output_dir=REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro",
|
doc_output_dir=doc_output_dir,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function")
|
progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function")
|
||||||
|
@ -56,14 +75,19 @@ def process_template(template_dir: Path, progress) -> None:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def check_for_changes() -> bool:
|
def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
|
||||||
"""Check if there are any uncommitted changes."""
|
"""Check if there are any uncommitted changes."""
|
||||||
|
has_changes = False
|
||||||
|
for path in change_tracker.changed_paths():
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["git", "diff", "--exit-code"],
|
["git", "diff", "--exit-code", path],
|
||||||
cwd=REPO_ROOT,
|
cwd=REPO_ROOT,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
)
|
)
|
||||||
return result.returncode != 0
|
if result.returncode != 0:
|
||||||
|
print(f"Change detected in '{path}'.", file=sys.stderr)
|
||||||
|
has_changes = True
|
||||||
|
return has_changes
|
||||||
|
|
||||||
|
|
||||||
def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
|
def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
|
||||||
|
@ -83,7 +107,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
|
||||||
return None, []
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
def generate_dependencies_file():
|
def generate_dependencies_file(change_tracker: ChangedPathTracker):
|
||||||
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
||||||
distribution_deps = {}
|
distribution_deps = {}
|
||||||
|
|
||||||
|
@ -93,12 +117,14 @@ def generate_dependencies_file():
|
||||||
distribution_deps[name] = deps
|
distribution_deps[name] = deps
|
||||||
|
|
||||||
deps_file = REPO_ROOT / "distributions" / "dependencies.json"
|
deps_file = REPO_ROOT / "distributions" / "dependencies.json"
|
||||||
|
change_tracker.add_paths(deps_file)
|
||||||
with open(deps_file, "w") as f:
|
with open(deps_file, "w") as f:
|
||||||
f.write(json.dumps(distribution_deps, indent=2) + "\n")
|
f.write(json.dumps(distribution_deps, indent=2) + "\n")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
||||||
|
change_tracker = ChangedPathTracker()
|
||||||
|
|
||||||
with Progress(
|
with Progress(
|
||||||
SpinnerColumn(),
|
SpinnerColumn(),
|
||||||
|
@ -108,7 +134,7 @@ def main():
|
||||||
task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
|
task = progress.add_task("Processing distribution templates...", total=len(template_dirs))
|
||||||
|
|
||||||
# Create a partial function with the progress bar
|
# Create a partial function with the progress bar
|
||||||
process_func = partial(process_template, progress=progress)
|
process_func = partial(process_template, progress=progress, change_tracker=change_tracker)
|
||||||
|
|
||||||
# Process templates in parallel
|
# Process templates in parallel
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
@ -116,9 +142,9 @@ def main():
|
||||||
list(executor.map(process_func, template_dirs))
|
list(executor.map(process_func, template_dirs))
|
||||||
progress.update(task, advance=len(template_dirs))
|
progress.update(task, advance=len(template_dirs))
|
||||||
|
|
||||||
generate_dependencies_file()
|
generate_dependencies_file(change_tracker)
|
||||||
|
|
||||||
if check_for_changes():
|
if check_for_changes(change_tracker):
|
||||||
print(
|
print(
|
||||||
"Distribution template changes detected. Please commit the changes.",
|
"Distribution template changes detected. Please commit the changes.",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
|
|
|
@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput
|
||||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
|
from llama_stack.providers.remote.inference.cerebras.models import model_aliases
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||||
from llama_stack.providers.remote.inference.nvidia.nvidia import _MODEL_ALIASES
|
from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,6 @@ distribution_spec:
|
||||||
- remote::ollama
|
- remote::ollama
|
||||||
vector_io:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- inline::sqlite_vec
|
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
safety:
|
safety:
|
||||||
|
|
|
@ -20,6 +20,13 @@ providers:
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
vector_io:
|
vector_io:
|
||||||
|
- provider_id: faiss
|
||||||
|
provider_type: inline::faiss
|
||||||
|
config:
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
namespace: null
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
|
|
@ -34,7 +34,6 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/sqlite_vec.db
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||||
from llama_stack.providers.remote.inference.sambanova.sambanova import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||||
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue