From cba55808abd00ddb7eacda225897bab9e5093362 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 3 Jun 2025 12:10:46 -0700 Subject: [PATCH] feat(distro): add more providers to starter distro, prefix conflicting models (#2362) The name changes to the verifications file are unfortunate, but maybe we don't need that @ehhuang ? Edit: deleted the verifications template now --- .../remote/inference/ollama/models.py | 2 +- .../remote/inference/ollama/ollama.py | 4 +- llama_stack/templates/groq/run.yaml | 16 +- llama_stack/templates/starter/build.yaml | 2 + llama_stack/templates/starter/run.yaml | 391 ++++++++-- llama_stack/templates/starter/starter.py | 18 + llama_stack/templates/template.py | 25 +- .../templates/verification/__init__.py | 7 - llama_stack/templates/verification/build.yaml | 40 - llama_stack/templates/verification/run.yaml | 731 ------------------ .../templates/verification/verification.py | 201 ----- 11 files changed, 400 insertions(+), 1037 deletions(-) delete mode 100644 llama_stack/templates/verification/__init__.py delete mode 100644 llama_stack/templates/verification/build.yaml delete mode 100644 llama_stack/templates/verification/run.yaml delete mode 100644 llama_stack/templates/verification/verification.py diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py index 42e364105..8f0f0421a 100644 --- a/llama_stack/providers/remote/inference/ollama/models.py +++ b/llama_stack/providers/remote/inference/ollama/models.py @@ -12,7 +12,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_model_entry, ) -model_entries = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "llama3.1:8b-instruct-fp16", CoreModelId.llama3_1_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index fd02568fe..7415f0eb0 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -78,7 +78,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( request_has_media, ) -from .models import model_entries +from .models import MODEL_ENTRIES logger = get_logger(name=__name__, category="inference") @@ -88,7 +88,7 @@ class OllamaInferenceAdapter( ModelsProtocolPrivate, ): def __init__(self, url: str) -> None: - self.register_helper = ModelRegistryHelper(model_entries) + self.register_helper = ModelRegistryHelper(MODEL_ENTRIES) self.url = url @property diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index f945d00a8..7f1912a6f 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -112,7 +112,7 @@ models: provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/meta-llama/Llama-3.1-8B-Instruct provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm @@ -127,7 +127,7 @@ models: provider_model_id: groq/llama3-70b-8192 model_type: llm - metadata: {} - model_id: meta-llama/Llama-3-70B-Instruct + model_id: groq/meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm @@ -137,7 +137,7 @@ models: provider_model_id: groq/llama-3.3-70b-versatile model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct + model_id: groq/meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm @@ -147,7 +147,7 @@ models: provider_model_id: groq/llama-3.2-3b-preview model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct + model_id: groq/meta-llama/Llama-3.2-3B-Instruct provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm @@ -157,7 +157,7 @@ models: provider_model_id: groq/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct provider_id: groq provider_model_id: groq/llama-4-scout-17b-16e-instruct model_type: llm @@ -167,7 +167,7 @@ models: provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct provider_id: groq provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct model_type: llm @@ -177,7 +177,7 @@ models: provider_model_id: groq/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct provider_id: groq provider_model_id: groq/llama-4-maverick-17b-128e-instruct model_type: llm @@ -187,7 +187,7 @@ models: provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct provider_id: groq provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct model_type: llm diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 26b48e9c9..fe3f4d8a6 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -5,6 +5,8 @@ distribution_spec: inference: - remote::openai - remote::fireworks + - remote::together + - remote::ollama - remote::anthropic - remote::gemini - remote::groq diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index ec4a81085..26f2e0f0d 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -21,6 +21,15 @@ providers: config: url: https://api.fireworks.ai/inference/v1 api_key: ${env.FIREWORKS_API_KEY:} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:} + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} - provider_id: anthropic provider_type: remote::anthropic config: @@ -163,72 +172,72 @@ models: provider_model_id: openai/chatgpt-4o-latest model_type: llm - metadata: {} - model_id: gpt-3.5-turbo-0125 + model_id: openai/gpt-3.5-turbo-0125 provider_id: openai provider_model_id: gpt-3.5-turbo-0125 model_type: llm - metadata: {} - model_id: gpt-3.5-turbo + model_id: openai/gpt-3.5-turbo provider_id: openai provider_model_id: gpt-3.5-turbo model_type: llm - metadata: {} - model_id: gpt-3.5-turbo-instruct + model_id: openai/gpt-3.5-turbo-instruct provider_id: openai provider_model_id: gpt-3.5-turbo-instruct model_type: llm - metadata: {} - model_id: gpt-4 + model_id: openai/gpt-4 provider_id: openai provider_model_id: gpt-4 model_type: llm - metadata: {} - model_id: gpt-4-turbo + model_id: openai/gpt-4-turbo provider_id: openai provider_model_id: gpt-4-turbo model_type: llm - metadata: {} - model_id: gpt-4o + model_id: openai/gpt-4o provider_id: openai provider_model_id: gpt-4o model_type: llm - metadata: {} - model_id: gpt-4o-2024-08-06 + model_id: openai/gpt-4o-2024-08-06 provider_id: openai provider_model_id: gpt-4o-2024-08-06 model_type: llm - metadata: {} - model_id: gpt-4o-mini + model_id: openai/gpt-4o-mini provider_id: openai provider_model_id: gpt-4o-mini model_type: llm - metadata: {} - model_id: gpt-4o-audio-preview + model_id: openai/gpt-4o-audio-preview provider_id: openai provider_model_id: gpt-4o-audio-preview model_type: llm - metadata: {} - model_id: chatgpt-4o-latest + model_id: openai/chatgpt-4o-latest provider_id: openai provider_model_id: chatgpt-4o-latest model_type: llm - metadata: {} - model_id: o1 + model_id: openai/o1 provider_id: openai provider_model_id: o1 model_type: llm - metadata: {} - model_id: o1-mini + model_id: openai/o1-mini provider_id: openai provider_model_id: o1-mini model_type: llm - metadata: {} - model_id: o3-mini + model_id: openai/o3-mini provider_id: openai provider_model_id: o3-mini model_type: llm - metadata: {} - model_id: o4-mini + model_id: openai/o4-mini provider_id: openai provider_model_id: o4-mini model_type: llm @@ -249,14 +258,14 @@ models: - metadata: embedding_dimension: 1536 context_length: 8192 - model_id: text-embedding-3-small + model_id: openai/text-embedding-3-small provider_id: openai provider_model_id: text-embedding-3-small model_type: embedding - metadata: embedding_dimension: 3072 context_length: 8192 - model_id: text-embedding-3-large + model_id: openai/text-embedding-3-large provider_id: openai provider_model_id: text-embedding-3-large model_type: embedding @@ -266,7 +275,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: fireworks/meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm @@ -276,7 +285,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-70B-Instruct + model_id: fireworks/meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm @@ -286,7 +295,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + model_id: fireworks/meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm @@ -296,7 +305,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct + model_id: fireworks/meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm @@ -306,7 +315,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + model_id: fireworks/meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm @@ -316,7 +325,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + model_id: fireworks/meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm @@ -326,7 +335,7 @@ models: provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct + model_id: fireworks/meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm @@ -336,7 +345,7 @@ models: provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} - model_id: meta-llama/Llama-Guard-3-8B + model_id: fireworks/meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm @@ -346,7 +355,7 @@ models: provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision + model_id: fireworks/meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm @@ -356,7 +365,7 @@ models: provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_id: fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic model_type: llm @@ -366,17 +375,307 @@ models: provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + model_id: fireworks/meta-llama/Llama-4-Maverick-17B-128E-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: fireworks/nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks provider_model_id: nomic-ai/nomic-embed-text-v1.5 model_type: embedding +- metadata: {} + model_id: together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.1-8B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.1-70B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.2-3B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-3.3-70B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-Guard-3-11B-Vision + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: togethercomputer/m2-bert-80M-8k-retrieval + provider_id: together + provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval + model_type: embedding +- metadata: + embedding_dimension: 768 + context_length: 32768 + model_id: togethercomputer/m2-bert-80M-32k-retrieval + provider_id: together + provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval + model_type: embedding +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: ollama/llama3.1:8b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.1:8b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.1-8B-Instruct + provider_id: ollama + provider_model_id: llama3.1:8b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.1:8b + provider_id: ollama + provider_model_id: llama3.1:8b + model_type: llm +- metadata: {} + model_id: ollama/llama3.1:70b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.1:70b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.1-70B-Instruct + provider_id: ollama + provider_model_id: llama3.1:70b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.1:70b + provider_id: ollama + provider_model_id: llama3.1:70b + model_type: llm +- metadata: {} + model_id: ollama/llama3.1:405b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.1:405b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: ollama + provider_model_id: llama3.1:405b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.1:405b + provider_id: ollama + provider_model_id: llama3.1:405b + model_type: llm +- metadata: {} + model_id: ollama/llama3.2:1b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.2:1b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.2-1B-Instruct + provider_id: ollama + provider_model_id: llama3.2:1b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.2:1b + provider_id: ollama + provider_model_id: llama3.2:1b + model_type: llm +- metadata: {} + model_id: ollama/llama3.2:3b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.2:3b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.2-3B-Instruct + provider_id: ollama + provider_model_id: llama3.2:3b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.2:3b + provider_id: ollama + provider_model_id: llama3.2:3b + model_type: llm +- metadata: {} + model_id: ollama/llama3.2-vision:11b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.2-vision:11b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: ollama + provider_model_id: llama3.2-vision:11b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.2-vision:latest + provider_id: ollama + provider_model_id: llama3.2-vision:latest + model_type: llm +- metadata: {} + model_id: ollama/llama3.2-vision:90b-instruct-fp16 + provider_id: ollama + provider_model_id: llama3.2-vision:90b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: ollama + provider_model_id: llama3.2-vision:90b-instruct-fp16 + model_type: llm +- metadata: {} + model_id: ollama/llama3.2-vision:90b + provider_id: ollama + provider_model_id: llama3.2-vision:90b + model_type: llm +- metadata: {} + model_id: ollama/llama3.3:70b + provider_id: ollama + provider_model_id: llama3.3:70b + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-3.3-70B-Instruct + provider_id: ollama + provider_model_id: llama3.3:70b + model_type: llm +- metadata: {} + model_id: ollama/llama-guard3:8b + provider_id: ollama + provider_model_id: llama-guard3:8b + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-Guard-3-8B + provider_id: ollama + provider_model_id: llama-guard3:8b + model_type: llm +- metadata: {} + model_id: ollama/llama-guard3:1b + provider_id: ollama + provider_model_id: llama-guard3:1b + model_type: llm +- metadata: {} + model_id: ollama/meta-llama/Llama-Guard-3-1B + provider_id: ollama + provider_model_id: llama-guard3:1b + model_type: llm +- metadata: + embedding_dimension: 384 + context_length: 512 + model_id: ollama/all-minilm:latest + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding +- metadata: + embedding_dimension: 384 + context_length: 512 + model_id: ollama/all-minilm + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: ollama/nomic-embed-text + provider_id: ollama + provider_model_id: nomic-embed-text + model_type: embedding - metadata: {} model_id: anthropic/claude-3-5-sonnet-latest provider_id: anthropic @@ -436,7 +735,7 @@ models: provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/meta-llama/Llama-3.1-8B-Instruct provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm @@ -451,7 +750,7 @@ models: provider_model_id: groq/llama3-70b-8192 model_type: llm - metadata: {} - model_id: meta-llama/Llama-3-70B-Instruct + model_id: groq/meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm @@ -461,7 +760,7 @@ models: provider_model_id: groq/llama-3.3-70b-versatile model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct + model_id: groq/meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm @@ -471,7 +770,7 @@ models: provider_model_id: groq/llama-3.2-3b-preview model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct + model_id: groq/meta-llama/Llama-3.2-3B-Instruct provider_id: groq provider_model_id: groq/llama-3.2-3b-preview model_type: llm @@ -481,7 +780,7 @@ models: provider_model_id: groq/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct provider_id: groq provider_model_id: groq/llama-4-scout-17b-16e-instruct model_type: llm @@ -491,7 +790,7 @@ models: provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct provider_id: groq provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct model_type: llm @@ -501,7 +800,7 @@ models: provider_model_id: groq/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct provider_id: groq provider_model_id: groq/llama-4-maverick-17b-128e-instruct model_type: llm @@ -511,7 +810,7 @@ models: provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct provider_id: groq provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct model_type: llm @@ -521,7 +820,7 @@ models: provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: sambanova/meta-llama/Llama-3.1-8B-Instruct provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct model_type: llm @@ -531,7 +830,7 @@ models: provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + model_id: sambanova/meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct model_type: llm @@ -541,7 +840,7 @@ models: provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct + model_id: sambanova/meta-llama/Llama-3.2-1B-Instruct provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct model_type: llm @@ -551,7 +850,7 @@ models: provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct + model_id: sambanova/meta-llama/Llama-3.2-3B-Instruct provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct model_type: llm @@ -561,7 +860,7 @@ models: provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct + model_id: sambanova/meta-llama/Llama-3.3-70B-Instruct provider_id: sambanova provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct model_type: llm @@ -571,7 +870,7 @@ models: provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + model_id: sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: sambanova provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct model_type: llm @@ -581,7 +880,7 @@ models: provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + model_id: sambanova/meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: sambanova provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct model_type: llm @@ -591,7 +890,7 @@ models: provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_id: sambanova/meta-llama/Llama-4-Scout-17B-16E-Instruct provider_id: sambanova provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct model_type: llm @@ -601,7 +900,7 @@ models: provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct model_type: llm - metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + model_id: sambanova/meta-llama/Llama-4-Maverick-17B-128E-Instruct provider_id: sambanova provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct model_type: llm @@ -611,7 +910,7 @@ models: provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm - metadata: {} - model_id: meta-llama/Llama-Guard-3-8B + model_id: sambanova/meta-llama/Llama-Guard-3-8B provider_id: sambanova provider_model_id: sambanova/Meta-Llama-Guard-3-8B model_type: llm diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index efec5fe69..ff9bee160 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -34,6 +34,10 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.models import ( MODEL_ENTRIES as GROQ_MODEL_ENTRIES, ) +from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig +from llama_stack.providers.remote.inference.ollama.models import ( + MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES, +) from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import ( MODEL_ENTRIES as OPENAI_MODEL_ENTRIES, @@ -42,6 +46,10 @@ from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImp from llama_stack.providers.remote.inference.sambanova.models import ( MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES, ) +from llama_stack.providers.remote.inference.together.config import TogetherImplConfig +from llama_stack.providers.remote.inference.together.models import ( + MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES, +) from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig from llama_stack.providers.remote.vector_io.pgvector.config import ( @@ -69,6 +77,16 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo FIREWORKS_MODEL_ENTRIES, FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), ), + ( + "together", + TOGETHER_MODEL_ENTRIES, + TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"), + ), + ( + "ollama", + OLLAMA_MODEL_ENTRIES, + OllamaImplConfig.sample_run_config(), + ), ( "anthropic", ANTHROPIC_MODEL_ENTRIES, diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index f00be28ad..b1ffba5be 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Literal import jinja2 +import rich import yaml from pydantic import BaseModel, Field @@ -36,13 +37,35 @@ def get_model_registry( available_models: dict[str, list[ProviderModelEntry]], ) -> list[ModelInput]: models = [] + + # check for conflicts in model ids + all_ids = set() + ids_conflict = False + + for _, entries in available_models.items(): + for entry in entries: + ids = [entry.provider_model_id] + entry.aliases + for model_id in ids: + if model_id in all_ids: + ids_conflict = True + rich.print( + f"[yellow]Model id {model_id} conflicts; all model ids will be prefixed with provider id[/yellow]" + ) + break + all_ids.update(ids) + if ids_conflict: + break + if ids_conflict: + break + for provider_id, entries in available_models.items(): for entry in entries: ids = [entry.provider_model_id] + entry.aliases for model_id in ids: + identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id models.append( ModelInput( - model_id=model_id, + model_id=identifier, provider_model_id=entry.provider_model_id, provider_id=provider_id, model_type=entry.model_type, diff --git a/llama_stack/templates/verification/__init__.py b/llama_stack/templates/verification/__init__.py deleted file mode 100644 index 5d8c281a6..000000000 --- a/llama_stack/templates/verification/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .verification import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/verification/build.yaml b/llama_stack/templates/verification/build.yaml deleted file mode 100644 index ce083dbba..000000000 --- a/llama_stack/templates/verification/build.yaml +++ /dev/null @@ -1,40 +0,0 @@ -version: '2' -distribution_spec: - description: Distribution for running e2e tests in CI - providers: - inference: - - remote::openai - - remote::fireworks-openai-compat - - remote::together-openai-compat - - remote::groq-openai-compat - - remote::sambanova-openai-compat - - remote::cerebras-openai-compat - - inline::sentence-transformers - vector_io: - - inline::sqlite-vec - - remote::chromadb - - remote::pgvector - safety: - - inline::llama-guard - agents: - - inline::meta-reference - telemetry: - - inline::meta-reference - eval: - - inline::meta-reference - datasetio: - - remote::huggingface - - inline::localfs - scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust - tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol -image_type: conda -additional_pip_packages: -- aiosqlite -- sqlalchemy[asyncio] diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml deleted file mode 100644 index a11991f6e..000000000 --- a/llama_stack/templates/verification/run.yaml +++ /dev/null @@ -1,731 +0,0 @@ -version: '2' -image_name: verification -apis: -- agents -- datasetio -- eval -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:} - - provider_id: fireworks-openai-compat - provider_type: remote::fireworks-openai-compat - config: - openai_compat_api_base: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:} - - provider_id: together-openai-compat - provider_type: remote::together-openai-compat - config: - openai_compat_api_base: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} - - provider_id: groq-openai-compat - provider_type: remote::groq-openai-compat - config: - openai_compat_api_base: https://api.groq.com/openai/v1 - api_key: ${env.GROQ_API_KEY:} - - provider_id: sambanova-openai-compat - provider_type: remote::sambanova-openai-compat - config: - openai_compat_api_base: https://api.sambanova.ai/v1 - api_key: ${env.SAMBANOVA_API_KEY:} - - provider_id: cerebras-openai-compat - provider_type: remote::cerebras-openai-compat - config: - openai_compat_api_base: https://api.cerebras.ai/v1 - api_key: ${env.CEREBRAS_API_KEY:} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: sqlite-vec - provider_type: inline::sqlite-vec - config: - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/sqlite_vec.db - - provider_id: ${env.ENABLE_CHROMADB+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:} - - provider_id: ${env.ENABLE_PGVECTOR+pgvector} - provider_type: remote::pgvector - config: - host: ${env.PGVECTOR_HOST:localhost} - port: ${env.PGVECTOR_PORT:5432} - db: ${env.PGVECTOR_DB:} - user: ${env.PGVECTOR_USER:} - password: ${env.PGVECTOR_PASSWORD:} - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/responses_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/inference_store.db -models: -- metadata: {} - model_id: openai/gpt-4o - provider_id: openai - provider_model_id: openai/gpt-4o - model_type: llm -- metadata: {} - model_id: openai/gpt-4o-mini - provider_id: openai - provider_model_id: openai/gpt-4o-mini - model_type: llm -- metadata: {} - model_id: openai/chatgpt-4o-latest - provider_id: openai - provider_model_id: openai/chatgpt-4o-latest - model_type: llm -- metadata: {} - model_id: gpt-3.5-turbo-0125 - provider_id: openai - provider_model_id: gpt-3.5-turbo-0125 - model_type: llm -- metadata: {} - model_id: gpt-3.5-turbo - provider_id: openai - provider_model_id: gpt-3.5-turbo - model_type: llm -- metadata: {} - model_id: gpt-3.5-turbo-instruct - provider_id: openai - provider_model_id: gpt-3.5-turbo-instruct - model_type: llm -- metadata: {} - model_id: gpt-4 - provider_id: openai - provider_model_id: gpt-4 - model_type: llm -- metadata: {} - model_id: gpt-4-turbo - provider_id: openai - provider_model_id: gpt-4-turbo - model_type: llm -- metadata: {} - model_id: gpt-4o - provider_id: openai - provider_model_id: gpt-4o - model_type: llm -- metadata: {} - model_id: gpt-4o-2024-08-06 - provider_id: openai - provider_model_id: gpt-4o-2024-08-06 - model_type: llm -- metadata: {} - model_id: gpt-4o-mini - provider_id: openai - provider_model_id: gpt-4o-mini - model_type: llm -- metadata: {} - model_id: gpt-4o-audio-preview - provider_id: openai - provider_model_id: gpt-4o-audio-preview - model_type: llm -- metadata: {} - model_id: chatgpt-4o-latest - provider_id: openai - provider_model_id: chatgpt-4o-latest - model_type: llm -- metadata: {} - model_id: o1 - provider_id: openai - provider_model_id: o1 - model_type: llm -- metadata: {} - model_id: o1-mini - provider_id: openai - provider_model_id: o1-mini - model_type: llm -- metadata: {} - model_id: o3-mini - provider_id: openai - provider_model_id: o3-mini - model_type: llm -- metadata: {} - model_id: o4-mini - provider_id: openai - provider_model_id: o4-mini - model_type: llm -- metadata: - embedding_dimension: 1536 - context_length: 8192 - model_id: openai/text-embedding-3-small - provider_id: openai - provider_model_id: openai/text-embedding-3-small - model_type: embedding -- metadata: - embedding_dimension: 3072 - context_length: 8192 - model_id: openai/text-embedding-3-large - provider_id: openai - provider_model_id: openai/text-embedding-3-large - model_type: embedding -- metadata: - embedding_dimension: 1536 - context_length: 8192 - model_id: text-embedding-3-small - provider_id: openai - provider_model_id: text-embedding-3-small - model_type: embedding -- metadata: - embedding_dimension: 3072 - context_length: 8192 - model_id: text-embedding-3-large - provider_id: openai - provider_model_id: text-embedding-3-large - model_type: embedding -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-guard-3-8b - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-8B - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-guard-3-8b - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-guard-3-11b-vision - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama4-scout-instruct-basic - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: fireworks-openai-compat - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: nomic-ai/nomic-embed-text-v1.5 - provider_id: fireworks-openai-compat - provider_model_id: nomic-ai/nomic-embed-text-v1.5 - model_type: embedding -- metadata: {} - model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Meta-Llama-Guard-3-8B - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-8B - provider_id: together-openai-compat - provider_model_id: meta-llama/Meta-Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-11B-Vision - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo - model_type: llm -- metadata: - embedding_dimension: 768 - context_length: 8192 - model_id: togethercomputer/m2-bert-80M-8k-retrieval - provider_id: together-openai-compat - provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval - model_type: embedding -- metadata: - embedding_dimension: 768 - context_length: 32768 - model_id: togethercomputer/m2-bert-80M-32k-retrieval - provider_id: together-openai-compat - provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval - model_type: embedding -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: together-openai-compat - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: groq/llama3-8b-8192 - provider_id: groq-openai-compat - provider_model_id: groq/llama3-8b-8192 - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama3-8b-8192 - model_type: llm -- metadata: {} - model_id: groq/llama-3.1-8b-instant - provider_id: groq-openai-compat - provider_model_id: groq/llama-3.1-8b-instant - model_type: llm -- metadata: {} - model_id: groq/llama3-70b-8192 - provider_id: groq-openai-compat - provider_model_id: groq/llama3-70b-8192 - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3-70B-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama3-70b-8192 - model_type: llm -- metadata: {} - model_id: groq/llama-3.3-70b-versatile - provider_id: groq-openai-compat - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: groq/llama-3.2-3b-preview - provider_id: groq-openai-compat - provider_model_id: groq/llama-3.2-3b-preview - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama-3.2-3b-preview - model_type: llm -- metadata: {} - model_id: groq/llama-4-scout-17b-16e-instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - provider_id: groq-openai-compat - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: groq/llama-4-maverick-17b-128e-instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - provider_id: groq-openai-compat - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: groq-openai-compat - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: sambanova/Meta-Llama-3.1-8B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Meta-Llama-3.1-405B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Meta-Llama-3.2-1B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Meta-Llama-3.2-3B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Meta-Llama-3.3-70B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Llama-3.2-11B-Vision-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Llama-3.2-90B-Vision-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct - model_type: llm -- metadata: {} - model_id: sambanova/Meta-Llama-Guard-3-8B - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-Guard-3-8B - provider_id: sambanova-openai-compat - provider_model_id: sambanova/Meta-Llama-Guard-3-8B - model_type: llm -- metadata: {} - model_id: llama3.1-8b - provider_id: cerebras-openai-compat - provider_model_id: llama3.1-8b - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: cerebras-openai-compat - provider_model_id: llama3.1-8b - model_type: llm -- metadata: {} - model_id: llama-3.3-70b - provider_id: cerebras-openai-compat - provider_model_id: llama-3.3-70b - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.3-70B-Instruct - provider_id: cerebras-openai-compat - provider_model_id: llama-3.3-70b - model_type: llm -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -shields: -- shield_id: meta-llama/Llama-Guard-3-8B -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -server: - port: 8321 diff --git a/llama_stack/templates/verification/verification.py b/llama_stack/templates/verification/verification.py deleted file mode 100644 index b58400f26..000000000 --- a/llama_stack/templates/verification/verification.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - - -from llama_stack.apis.models.models import ModelType -from llama_stack.distribution.datatypes import ( - ModelInput, - Provider, - ShieldInput, - ToolGroupInput, -) -from llama_stack.providers.inline.inference.sentence_transformers import ( - SentenceTransformersInferenceConfig, -) -from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( - SQLiteVectorIOConfig, -) -from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES -from llama_stack.providers.remote.inference.cerebras_openai_compat.config import CerebrasCompatConfig -from llama_stack.providers.remote.inference.fireworks.models import ( - MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.fireworks_openai_compat.config import FireworksCompatConfig -from llama_stack.providers.remote.inference.groq.models import ( - MODEL_ENTRIES as GROQ_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.groq_openai_compat.config import GroqCompatConfig -from llama_stack.providers.remote.inference.openai.config import OpenAIConfig -from llama_stack.providers.remote.inference.openai.models import ( - MODEL_ENTRIES as OPENAI_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES -from llama_stack.providers.remote.inference.sambanova_openai_compat.config import SambaNovaCompatConfig -from llama_stack.providers.remote.inference.together.models import ( - MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES, -) -from llama_stack.providers.remote.inference.together_openai_compat.config import TogetherCompatConfig -from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig -from llama_stack.providers.remote.vector_io.pgvector.config import ( - PGVectorVectorIOConfig, -) -from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry -from llama_stack.templates.template import ( - DistributionTemplate, - RunConfigSettings, - get_model_registry, -) - - -def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]: - # in this template, we allow each API key to be optional - providers = [ - ( - "openai", - OPENAI_MODEL_ENTRIES, - OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"), - ), - ( - "fireworks-openai-compat", - FIREWORKS_MODEL_ENTRIES, - FireworksCompatConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"), - ), - ( - "together-openai-compat", - TOGETHER_MODEL_ENTRIES, - TogetherCompatConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"), - ), - ( - "groq-openai-compat", - GROQ_MODEL_ENTRIES, - GroqCompatConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"), - ), - ( - "sambanova-openai-compat", - SAMBANOVA_MODEL_ENTRIES, - SambaNovaCompatConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"), - ), - ( - "cerebras-openai-compat", - CEREBRAS_MODEL_ENTRIES, - CerebrasCompatConfig.sample_run_config(api_key="${env.CEREBRAS_API_KEY:}"), - ), - ] - inference_providers = [] - available_models = {} - for provider_id, model_entries, config in providers: - inference_providers.append( - Provider( - provider_id=provider_id, - provider_type=f"remote::{provider_id}", - config=config, - ) - ) - available_models[provider_id] = model_entries - return inference_providers, available_models - - -def get_distribution_template() -> DistributionTemplate: - inference_providers, available_models = get_inference_providers() - providers = { - "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), - "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], - "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", - ], - } - name = "verification" - - vector_io_providers = [ - Provider( - provider_id="sqlite-vec", - provider_type="inline::sqlite-vec", - config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), - ), - Provider( - provider_id="${env.ENABLE_CHROMADB+chromadb}", - provider_type="remote::chromadb", - config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"), - ), - Provider( - provider_id="${env.ENABLE_PGVECTOR+pgvector}", - provider_type="remote::pgvector", - config=PGVectorVectorIOConfig.sample_run_config( - db="${env.PGVECTOR_DB:}", - user="${env.PGVECTOR_USER:}", - password="${env.PGVECTOR_PASSWORD:}", - ), - ), - ] - embedding_provider = Provider( - provider_id="sentence-transformers", - provider_type="inline::sentence-transformers", - config=SentenceTransformersInferenceConfig.sample_run_config(), - ) - - default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), - ToolGroupInput( - toolgroup_id="builtin::rag", - provider_id="rag-runtime", - ), - ] - embedding_model = ModelInput( - model_id="all-MiniLM-L6-v2", - provider_id=embedding_provider.provider_id, - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 384, - }, - ) - - default_models = get_model_registry(available_models) - return DistributionTemplate( - name=name, - distro_type="self_hosted", - description="Distribution for running e2e tests in CI", - container_image=None, - template_path=None, - providers=providers, - available_models_by_provider=available_models, - run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides={ - "inference": inference_providers + [embedding_provider], - "vector_io": vector_io_providers, - }, - default_models=default_models + [embedding_model], - default_tool_groups=default_tool_groups, - default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], - ), - }, - run_config_env_vars={ - "LLAMA_STACK_PORT": ( - "8321", - "Port for the Llama Stack distribution server", - ), - "FIREWORKS_API_KEY": ( - "", - "Fireworks API Key", - ), - "OPENAI_API_KEY": ( - "", - "OpenAI API Key", - ), - }, - )