diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 790ae2b20..9acb244bd 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -46,6 +46,7 @@ def get_distribution_template() -> DistributionTemplate: ModelInput( model_id=core_model_to_hf_repo[m.llama_model], provider_model_id=m.provider_model_id, + provider_id="cerebras", ) for m in model_aliases ] @@ -54,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 8922cdc43..b7c2d316e 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -52,16 +52,16 @@ metadata_store: models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: null + provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: null + provider_id: cerebras provider_model_id: llama3.1-70b model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index a5e5ff0ba..cbcac0f92 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -57,6 +57,7 @@ def get_distribution_template() -> DistributionTemplate: ModelInput( model_id=core_model_to_hf_repo[m.llama_model], provider_model_id=m.provider_model_id, + provider_id="fireworks", ) for m in MODEL_ALIASES ] @@ -65,7 +66,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 084335870..cb31b4678 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -77,51 +77,51 @@ metadata_store: models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p1-8b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p1-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p1-405b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p2-1b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p2-3b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-guard-3-8b model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision - provider_id: null + provider_id: fireworks provider_model_id: fireworks/llama-guard-3-11b-vision model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index b38124116..404440be6 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 964273cb1..8e566de9a 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -91,7 +91,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 9cdbfdff5..c1b3a64d0 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -81,7 +81,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index d3b192ef0..63b423412 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -56,7 +56,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index 294256ed5..2b24ab074 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -91,7 +91,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 1967e7ec7..394d689da 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -81,7 +81,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index af03ec9ee..461d89a4a 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -59,7 +59,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) safety_model = ModelInput( diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 603027675..deb6c4a91 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -93,7 +93,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 9b8eeea85..c19066664 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -82,7 +82,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index 6ea239cb2..f1a1f90cd 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -62,7 +62,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) return DistributionTemplate( diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index ad5ba0184..550170a00 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -84,7 +84,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 23b56a958..1e3180a77 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -58,7 +58,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index a17f7b721..100886c95 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -85,7 +85,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index d2b8f0fb3..bcbed3e6e 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -80,7 +80,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 2c8011829..7097bc649 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -68,7 +68,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index b516bb8d4..c957b05d0 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -57,7 +57,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 867659fa8..e4c948fbf 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -57,7 +57,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index 4f3ceb0f8..22c08d1d3 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -80,7 +80,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 9b5b1bf37..c84f5b5fe 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -56,7 +56,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) safety_model = ModelInput( diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 8b437fd81..9f02d8b54 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -77,46 +77,46 @@ metadata_store: models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: null + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: null + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: null + provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: null + provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: null + provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: null + provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B - provider_id: null + provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision - provider_id: null + provider_id: together provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 8c1da309c..994cf5549 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -55,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: ModelInput( model_id=core_model_to_hf_repo[m.llama_model], provider_model_id=m.provider_model_id, + provider_id="together", ) for m in MODEL_ALIASES ] @@ -63,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, ) diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 0cbfd2b09..171f25d63 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -84,7 +84,7 @@ models: provider_model_id: null model_type: llm - metadata: - embedding_dim: 384 + embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers provider_model_id: null diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 54a8bf747..fe6fb7186 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -51,7 +51,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="sentence-transformers", model_type=ModelType.embedding, metadata={ - "embedding_dim": 384, + "embedding_dimension": 384, }, )