fix: register provider model name and HF alias in run.yaml

This commit is contained in:
Ashwin Bharambe 2025-02-27 12:03:43 -08:00
parent 4780223544
commit 9f9278f9a8
48 changed files with 597 additions and 220 deletions

View file

@ -27,19 +27,19 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)` - `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)` - `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)` - `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)` - `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)` - `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)` - `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` - `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` - `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` - `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` - `nvidia/nv-embedqa-e5-v5 `
- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` - `nvidia/nv-embedqa-mistral-7b-v2 `
- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` - `snowflake/arctic-embed-l `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -34,9 +34,9 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)` - `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)` - `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)` - `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -27,8 +27,8 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)` - `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)` - `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -37,17 +37,17 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)` - `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)` - `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)` - `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)` - `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)` - `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)` - `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)` - `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)` - `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)` - `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)`
- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)` - `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)` - `nomic-ai/nomic-embed-text-v1.5 `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -37,11 +37,11 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)` - `groq/llama3-8b-8192 `
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)` - `groq/llama-3.1-8b-instant `
- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)` - `groq/llama3-70b-8192 `
- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)` - `groq/llama-3.3-70b-versatile `
- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)` - `groq/llama-3.2-3b-preview `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -34,15 +34,15 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` - `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` - `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` - `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` - `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` - `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` - `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` - `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` - `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` - `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -37,17 +37,17 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct` - `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct` - `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8` - `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-3B-Instruct` - `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct` - `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct` - `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct` - `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-Guard-3-8B` - `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
- `meta-llama/Llama-Guard-3-11B-Vision` - `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
- `togethercomputer/m2-bert-80M-8k-retrieval` - `togethercomputer/m2-bert-80M-8k-retrieval `
- `togethercomputer/m2-bert-80M-32k-retrieval` - `togethercomputer/m2-bert-80M-32k-retrieval `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
) )
from .config import CerebrasImplConfig from .config import CerebrasImplConfig
from .models import model_entries from .models import MODEL_ENTRIES
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
def __init__(self, config: CerebrasImplConfig) -> None: def __init__(self, config: CerebrasImplConfig) -> None:
ModelRegistryHelper.__init__( ModelRegistryHelper.__init__(
self, self,
model_entries=model_entries, model_entries=MODEL_ENTRIES,
) )
self.config = config self.config = config

View file

@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry, build_hf_repo_model_entry,
) )
model_entries = [ MODEL_ENTRIES = [
build_hf_repo_model_entry( build_hf_repo_model_entry(
"llama3.1-8b", "llama3.1-8b",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,

View file

@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry, build_hf_repo_model_entry,
) )
_MODEL_ENTRIES = [ MODEL_ENTRIES = [
build_hf_repo_model_entry( build_hf_repo_model_entry(
"meta/llama3-8b-instruct", "meta/llama3-8b-instruct",
CoreModelId.llama3_8b_instruct.value, CoreModelId.llama3_8b_instruct.value,

View file

@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
from . import NVIDIAConfig from . import NVIDIAConfig
from .models import _MODEL_ENTRIES from .models import MODEL_ENTRIES
from .openai_utils import ( from .openai_utils import (
convert_chat_completion_request, convert_chat_completion_request,
convert_completion_request, convert_completion_request,
@ -62,7 +62,7 @@ logger = logging.getLogger(__name__)
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
def __init__(self, config: NVIDIAConfig) -> None: def __init__(self, config: NVIDIAConfig) -> None:
# TODO(mf): filter by available models # TODO(mf): filter by available models
ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")

View file

@ -6,12 +6,10 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models import ModelInput
from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate:
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
"bedrock": MODEL_ENTRIES,
}
default_models = get_model_registry(available_models)
default_models = [
ModelInput(
model_id=core_model_to_hf_repo[m.llama_model],
provider_model_id=m.provider_model_id,
provider_id="bedrock",
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -28,7 +28,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -88,16 +88,31 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
models: models:
- metadata: {}
model_id: meta.llama3-1-8b-instruct-v1:0
provider_id: bedrock
provider_model_id: meta.llama3-1-8b-instruct-v1:0
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: bedrock provider_id: bedrock
provider_model_id: meta.llama3-1-8b-instruct-v1:0 provider_model_id: meta.llama3-1-8b-instruct-v1:0
model_type: llm model_type: llm
- metadata: {}
model_id: meta.llama3-1-70b-instruct-v1:0
provider_id: bedrock
provider_model_id: meta.llama3-1-70b-instruct-v1:0
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: bedrock provider_id: bedrock
provider_model_id: meta.llama3-1-70b-instruct-v1:0 provider_model_id: meta.llama3-1-70b-instruct-v1:0
model_type: llm model_type: llm
- metadata: {}
model_id: meta.llama3-1-405b-instruct-v1:0
provider_id: bedrock
provider_model_id: meta.llama3-1-405b-instruct-v1:0
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: bedrock provider_id: bedrock

View file

@ -8,14 +8,13 @@ from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
from llama_stack.providers.remote.inference.cerebras.models import model_entries from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate:
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "cerebras": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model], default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id="cerebras",
)
for m in model_entries
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -20,7 +20,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -90,11 +90,21 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
models: models:
- metadata: {}
model_id: llama3.1-8b
provider_id: cerebras
provider_model_id: llama3.1-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: cerebras provider_id: cerebras
provider_model_id: llama3.1-8b provider_model_id: llama3.1-8b
model_type: llm model_type: llm
- metadata: {}
model_id: llama-3.3-70b
provider_id: cerebras
provider_model_id: llama-3.3-70b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: cerebras provider_id: cerebras

View file

@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="code-interpreter", provider_id="code-interpreter",
), ),
] ]
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "fireworks": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, default_models = get_model_registry(available_models)
provider_id="fireworks",
model_type=m.model_type,
metadata=m.metadata,
)
for m in MODEL_ENTRIES
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=default_models + [embedding_model], available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -90,51 +90,112 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
models: models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768
context_length: 8192 context_length: 8192
model_id: nomic-ai/nomic-embed-text-v1.5 model_id: nomic-ai/nomic-embed-text-v1.5
provider_id: fireworks provider_id: fireworks
provider_model_id: nomic-ai/nomic-embed-text-v1.5
model_type: embedding model_type: embedding
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384

View file

@ -3,7 +3,6 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ( from llama_stack.distribution.datatypes import (
@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
distro_type="self_hosted", distro_type="self_hosted",
description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container", description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, embedding_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
), ),
] ]
inference_providers = [] inference_providers = []
default_models = [] available_models = {}
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
for provider_id, model_entries, config in providers: for provider_id, model_entries, config in providers:
inference_providers.append( inference_providers.append(
Provider( Provider(
@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
config=config, config=config,
) )
) )
default_models.extend( available_models[provider_id] = model_entries
ModelInput( return inference_providers, available_models
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
provider_model_id=m.provider_model_id,
provider_id=provider_id,
model_type=m.model_type,
metadata=m.metadata,
)
for m in model_entries
)
return inference_providers, default_models
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
inference_providers, default_models = get_inference_providers() inference_providers, available_models = get_inference_providers()
providers = { providers = {
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
}, },
) )
default_models = get_model_registry(available_models)
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="self_hosted", distro_type="self_hosted",
@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[], available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -136,51 +136,101 @@ models:
provider_id: openai provider_id: openai
provider_model_id: openai/text-embedding-3-large provider_model_id: openai/text-embedding-3-large
model_type: embedding model_type: embedding
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks
@ -247,27 +297,27 @@ models:
provider_model_id: gemini/text-embedding-004 provider_model_id: gemini/text-embedding-004
model_type: embedding model_type: embedding
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: groq/llama3-8b-8192
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-8b-8192 provider_model_id: groq/llama3-8b-8192
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: groq/llama-3.1-8b-instant
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.1-8b-instant provider_model_id: groq/llama-3.1-8b-instant
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct model_id: groq/llama3-70b-8192
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-70b-8192 provider_model_id: groq/llama3-70b-8192
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: groq/llama-3.3-70b-versatile
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: groq/llama-3.2-3b-preview
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.2-3b-preview provider_model_id: groq/llama-3.2-3b-preview
model_type: llm model_type: llm

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate:
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "fireworks": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id="fireworks",
metadata=m.metadata,
model_type=m.model_type,
)
for m in MODEL_ENTRIES
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -99,51 +99,101 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models: models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks

View file

@ -93,51 +93,101 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models: models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import (
Provider, Provider,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq import GroqConfig
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate:
}, },
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "groq": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id=name,
model_type=m.model_type,
metadata=m.metadata,
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
docker_image=None, docker_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -91,27 +91,27 @@ metadata_store:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: groq/llama3-8b-8192
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-8b-8192 provider_model_id: groq/llama3-8b-8192
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: groq/llama-3.1-8b-instant
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.1-8b-instant provider_model_id: groq/llama-3.1-8b-instant
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct model_id: groq/llama3-70b-8192
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-70b-8192 provider_model_id: groq/llama3-70b-8192
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: groq/llama-3.3-70b-versatile
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: groq/llama-3.2-3b-preview
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.2-3b-preview provider_model_id: groq/llama-3.2-3b-preview
model_type: llm model_type: llm

View file

@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate:
description="Use Meta Reference for running LLM inference", description="Use Meta Reference for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
description="Use Meta Reference with fp8, int4 quantization for running LLM inference", description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -20,7 +20,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -6,11 +6,10 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate:
config=NVIDIAConfig.sample_run_config(), config=NVIDIAConfig.sample_run_config(),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "nvidia": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
provider_model_id=m.provider_model_id,
provider_id="nvidia",
model_type=m.model_type,
metadata=m.metadata,
)
for m in _MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
] ]
default_models = get_model_registry(available_models)
return DistributionTemplate( return DistributionTemplate(
name="nvidia", name="nvidia",
distro_type="remote_hosted", distro_type="remote_hosted",
@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -90,46 +90,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
models: models:
- metadata: {}
model_id: meta/llama3-8b-instruct
provider_id: nvidia
provider_model_id: meta/llama3-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-8B-Instruct model_id: meta-llama/Llama-3-8B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama3-8b-instruct provider_model_id: meta/llama3-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama3-70b-instruct
provider_id: nvidia
provider_model_id: meta/llama3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct model_id: meta-llama/Llama-3-70B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama3-70b-instruct provider_model_id: meta/llama3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.1-8b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.1-8b-instruct provider_model_id: meta/llama-3.1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.1-70b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.1-70b-instruct provider_model_id: meta/llama-3.1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.1-405b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.1-405b-instruct provider_model_id: meta/llama-3.1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-1b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.2-1b-instruct provider_model_id: meta/llama-3.2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-3b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.2-3b-instruct provider_model_id: meta/llama-3.2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-11b-vision-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.2-11b-vision-instruct provider_model_id: meta/llama-3.2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-90b-vision-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: nvidia provider_id: nvidia

View file

@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate:
description="Use (an external) vLLM server for running LLM inference", description="Use (an external) vLLM server for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -68,46 +68,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
models: models:
- metadata: {}
model_id: Meta-Llama-3.1-8B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-8B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.1-8B-Instruct provider_model_id: Meta-Llama-3.1-8B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.1-70B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-70B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.1-70B-Instruct provider_model_id: Meta-Llama-3.1-70B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.1-405B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-405B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.1-405B-Instruct provider_model_id: Meta-Llama-3.1-405B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.2-1B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.2-1B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.2-1B-Instruct provider_model_id: Meta-Llama-3.2-1B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.2-3B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.2-3B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.2-3B-Instruct provider_model_id: Meta-Llama-3.2-3B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.3-70B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.3-70B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.3-70B-Instruct provider_model_id: Meta-Llama-3.3-70B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Llama-3.2-11B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-11B-Vision-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Llama-3.2-11B-Vision-Instruct provider_model_id: Llama-3.2-11B-Vision-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Llama-3.2-90B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-90B-Vision-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Llama-3.2-90B-Vision-Instruct provider_model_id: Llama-3.2-90B-Vision-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-Guard-3-8B
provider_id: sambanova
provider_model_id: Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: sambanova provider_id: sambanova

View file

@ -7,15 +7,13 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import ( from llama_stack.distribution.datatypes import (
ModelInput,
Provider, Provider,
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate:
config=SambaNovaImplConfig.sample_run_config(), config=SambaNovaImplConfig.sample_run_config(),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ name: MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model], default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id=name,
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
docker_image=None, docker_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import (
) )
from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]:
models = []
for provider_id, entries in available_models.items():
for entry in entries:
ids = [entry.provider_model_id] + entry.aliases
for model_id in ids:
models.append(
ModelInput(
model_id=model_id,
provider_model_id=entry.provider_model_id,
provider_id=provider_id,
model_type=entry.model_type,
metadata=entry.metadata,
)
)
return models
class DefaultModel(BaseModel):
model_id: str
doc_string: str
class RunConfigSettings(BaseModel): class RunConfigSettings(BaseModel):
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
default_models: Optional[List[ModelInput]] = None default_models: Optional[List[ModelInput]] = None
@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel):
run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
container_image: Optional[str] = None container_image: Optional[str] = None
default_models: Optional[List[ModelInput]] = None available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
def build_config(self) -> BuildConfig: def build_config(self) -> BuildConfig:
return BuildConfig( return BuildConfig(
@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel):
autoescape=True, autoescape=True,
) )
template = env.from_string(template) template = env.from_string(template)
default_models = []
if self.available_models_by_provider:
has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
for provider_id, model_entries in self.available_models_by_provider.items():
for model_entry in model_entries:
doc_parts = []
if model_entry.aliases:
doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
if has_multiple_providers:
doc_parts.append(f"provider: {provider_id}")
default_models.append(
DefaultModel(
model_id=model_entry.provider_model_id,
doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "",
)
)
return template.render( return template.render(
name=self.name, name=self.name,
description=self.description, description=self.description,
providers=self.providers, providers=self.providers,
providers_table=providers_table, providers_table=providers_table,
run_config_env_vars=self.run_config_env_vars, run_config_env_vars=self.run_config_env_vars,
default_models=self.default_models, default_models=default_models,
) )
def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None: def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:

View file

@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }}` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -99,46 +99,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models: models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: together provider_id: together

View file

@ -93,46 +93,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models: models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: together provider_id: together

View file

@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig
from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
available_models = {
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} "together": MODEL_ENTRIES,
default_models = [ }
ModelInput( default_models = get_model_registry(available_models)
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
provider_model_id=m.provider_model_id,
provider_id="together",
metadata=m.metadata,
model_type=m.model_type,
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[inference_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={