diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index 625e36e4f..2f18e5d26 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -3,57 +3,98 @@ distribution_spec: description: CI tests for Llama Stack providers: inference: - - remote::cerebras - - remote::ollama - - remote::vllm - - remote::tgi - - remote::hf::serverless - - remote::hf::endpoint - - remote::fireworks - - remote::together - - remote::bedrock - - remote::databricks - - remote::nvidia - - remote::runpod - - remote::openai - - remote::anthropic - - remote::gemini - - remote::groq - - remote::llama-openai-compat - - remote::sambanova - - remote::passthrough - - inline::sentence-transformers + - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + provider_type: remote::cerebras + - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + provider_type: remote::ollama + - provider_id: ${env.ENABLE_VLLM:=__disabled__} + provider_type: remote::vllm + - provider_id: ${env.ENABLE_TGI:=__disabled__} + provider_type: remote::tgi + - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} + provider_type: remote::hf::serverless + - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} + provider_type: remote::hf::endpoint + - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + provider_type: remote::fireworks + - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + provider_type: remote::together + - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} + provider_type: remote::bedrock + - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} + provider_type: remote::databricks + - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} + provider_type: remote::nvidia + - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} + provider_type: remote::runpod + - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + provider_type: remote::openai + - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + provider_type: remote::anthropic + - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + provider_type: remote::gemini + - provider_id: ${env.ENABLE_GROQ:=__disabled__} + provider_type: remote::groq + - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} + provider_type: remote::llama-openai-compat + - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + provider_type: remote::sambanova + - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} + provider_type: remote::passthrough + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers vector_io: - - inline::faiss - - inline::sqlite-vec - - inline::milvus - - remote::chromadb - - remote::pgvector + - provider_id: ${env.ENABLE_FAISS:=faiss} + provider_type: inline::faiss + - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} + provider_type: inline::sqlite-vec + - provider_id: ${env.ENABLE_MILVUS:=__disabled__} + provider_type: inline::milvus + - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} + provider_type: remote::chromadb + - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} + provider_type: remote::pgvector files: - - inline::localfs + - provider_id: localfs + provider_type: inline::localfs safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference post_training: - - inline::huggingface + - provider_id: huggingface + provider_type: inline::huggingface eval: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - provider_id: huggingface + provider_type: remote::huggingface + - provider_id: localfs + provider_type: inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol image_type: conda +image_name: ci-tests additional_pip_packages: - aiosqlite - asyncpg diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 1396d54a8..6f8a192ee 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -56,7 +56,6 @@ providers: api_key: ${env.TOGETHER_API_KEY} - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} provider_type: remote::bedrock - config: {} - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} provider_type: remote::databricks config: @@ -107,7 +106,6 @@ providers: api_key: ${env.PASSTHROUGH_API_KEY} - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: ${env.ENABLE_FAISS:=faiss} provider_type: inline::faiss @@ -208,10 +206,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -229,10 +225,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml index ff8d58a08..d19934ee5 100644 --- a/llama_stack/templates/dell/build.yaml +++ b/llama_stack/templates/dell/build.yaml @@ -4,32 +4,50 @@ distribution_spec: container providers: inference: - - remote::tgi - - inline::sentence-transformers + - provider_id: tgi + provider_type: remote::tgi + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers vector_io: - - inline::faiss - - remote::chromadb - - remote::pgvector + - provider_id: faiss + provider_type: inline::faiss + - provider_id: chromadb + provider_type: remote::chromadb + - provider_id: pgvector + provider_type: remote::pgvector safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference eval: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - provider_id: huggingface + provider_type: remote::huggingface + - provider_id: localfs + provider_type: inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime image_type: conda +image_name: dell additional_pip_packages: - aiosqlite - sqlalchemy[asyncio] diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 5a6f52a89..b2210e7dc 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -19,18 +19,32 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::tgi", "inline::sentence-transformers"], - "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "inference": [ + Provider(provider_id="tgi", provider_type="remote::tgi"), + Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"), + ], + "vector_io": [ + Provider(provider_id="faiss", provider_type="inline::faiss"), + Provider(provider_id="chromadb", provider_type="remote::chromadb"), + Provider(provider_id="pgvector", provider_type="remote::pgvector"), + ], + "safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")], + "agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "eval": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "datasetio": [ + Provider(provider_id="huggingface", provider_type="remote::huggingface"), + Provider(provider_id="localfs", provider_type="inline::localfs"), + ], + "scoring": [ + Provider(provider_id="basic", provider_type="inline::basic"), + Provider(provider_id="llm-as-judge", provider_type="inline::llm-as-judge"), + Provider(provider_id="braintrust", provider_type="inline::braintrust"), + ], "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", + Provider(provider_id="brave-search", provider_type="remote::brave-search"), + Provider(provider_id="tavily-search", provider_type="remote::tavily-search"), + Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"), ], } name = "dell" diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 768fad4fa..ecc6729eb 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -22,7 +22,6 @@ providers: url: ${env.DEH_SAFETY_URL} - provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: chromadb provider_type: remote::chromadb @@ -74,10 +73,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -95,7 +92,6 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index de2ada009..fc2553526 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -18,7 +18,6 @@ providers: url: ${env.DEH_URL} - provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: chromadb provider_type: remote::chromadb @@ -70,10 +69,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -91,7 +88,6 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index 2119eeddd..0a0bc0aea 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -3,32 +3,50 @@ distribution_spec: description: Use Meta Reference for running LLM inference providers: inference: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference vector_io: - - inline::faiss - - remote::chromadb - - remote::pgvector + - provider_id: faiss + provider_type: inline::faiss + - provider_id: chromadb + provider_type: remote::chromadb + - provider_id: pgvector + provider_type: remote::pgvector safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference eval: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - provider_id: huggingface + provider_type: remote::huggingface + - provider_id: localfs + provider_type: inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol image_type: conda +image_name: meta-reference-gpu additional_pip_packages: - aiosqlite - sqlalchemy[asyncio] diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 4bfb4e9d8..6ca500eff 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -25,19 +25,91 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["inline::meta-reference"], - "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "inference": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "vector_io": [ + Provider( + provider_id="faiss", + provider_type="inline::faiss", + ), + Provider( + provider_id="chromadb", + provider_type="remote::chromadb", + ), + Provider( + provider_id="pgvector", + provider_type="remote::pgvector", + ), + ], + "safety": [ + Provider( + provider_id="llama-guard", + provider_type="inline::llama-guard", + ) + ], + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "telemetry": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "eval": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "datasetio": [ + Provider( + provider_id="huggingface", + provider_type="remote::huggingface", + ), + Provider( + provider_id="localfs", + provider_type="inline::localfs", + ), + ], + "scoring": [ + Provider( + provider_id="basic", + provider_type="inline::basic", + ), + Provider( + provider_id="llm-as-judge", + provider_type="inline::llm-as-judge", + ), + Provider( + provider_id="braintrust", + provider_type="inline::braintrust", + ), + ], "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", + Provider( + provider_id="brave-search", + provider_type="remote::brave-search", + ), + Provider( + provider_id="tavily-search", + provider_type="remote::tavily-search", + ), + Provider( + provider_id="rag-runtime", + provider_type="inline::rag-runtime", + ), + Provider( + provider_id="model-context-protocol", + provider_type="remote::model-context-protocol", + ), ], } name = "meta-reference-gpu" diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 49657a680..910f9ec46 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -24,7 +24,6 @@ providers: max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} - provider_id: meta-reference-safety provider_type: inline::meta-reference config: @@ -88,10 +87,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -109,10 +106,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 2923b5faf..5266f3c84 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -24,7 +24,6 @@ providers: max_seq_len: ${env.MAX_SEQ_LEN:=4096} - provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -78,10 +77,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -99,10 +96,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index 51685b2e3..572a70408 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -3,27 +3,39 @@ distribution_spec: description: Use NVIDIA NIM for running LLM inference, evaluation and safety providers: inference: - - remote::nvidia + - provider_id: nvidia + provider_type: remote::nvidia vector_io: - - inline::faiss + - provider_id: faiss + provider_type: inline::faiss safety: - - remote::nvidia + - provider_id: nvidia + provider_type: remote::nvidia agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference eval: - - remote::nvidia + - provider_id: nvidia + provider_type: remote::nvidia post_training: - - remote::nvidia + - provider_id: nvidia + provider_type: remote::nvidia datasetio: - - inline::localfs - - remote::nvidia + - provider_id: localfs + provider_type: inline::localfs + - provider_id: nvidia + provider_type: remote::nvidia scoring: - - inline::basic + - provider_id: basic + provider_type: inline::basic tool_runtime: - - inline::rag-runtime + - provider_id: rag-runtime + provider_type: inline::rag-runtime image_type: conda +image_name: nvidia additional_pip_packages: - aiosqlite - sqlalchemy[asyncio] diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index e5c13aa74..25beeae75 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -17,16 +17,65 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::nvidia"], - "vector_io": ["inline::faiss"], - "safety": ["remote::nvidia"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["remote::nvidia"], - "post_training": ["remote::nvidia"], - "datasetio": ["inline::localfs", "remote::nvidia"], - "scoring": ["inline::basic"], - "tool_runtime": ["inline::rag-runtime"], + "inference": [ + Provider( + provider_id="nvidia", + provider_type="remote::nvidia", + ) + ], + "vector_io": [ + Provider( + provider_id="faiss", + provider_type="inline::faiss", + ) + ], + "safety": [ + Provider( + provider_id="nvidia", + provider_type="remote::nvidia", + ) + ], + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "telemetry": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "eval": [ + Provider( + provider_id="nvidia", + provider_type="remote::nvidia", + ) + ], + "post_training": [Provider(provider_id="nvidia", provider_type="remote::nvidia", config={})], + "datasetio": [ + Provider( + provider_id="localfs", + provider_type="inline::localfs", + ), + Provider( + provider_id="nvidia", + provider_type="remote::nvidia", + ), + ], + "scoring": [ + Provider( + provider_id="basic", + provider_type="inline::basic", + ) + ], + "tool_runtime": [ + Provider( + provider_id="rag-runtime", + provider_type="inline::rag-runtime", + ) + ], } inference_provider = Provider( diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index 7017a5955..015724050 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -85,11 +85,9 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} tool_runtime: - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index ccddf11a2..f087e89ee 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -74,11 +74,9 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} tool_runtime: - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml index 5f82c5243..6647b471c 100644 --- a/llama_stack/templates/open-benchmark/build.yaml +++ b/llama_stack/templates/open-benchmark/build.yaml @@ -3,36 +3,58 @@ distribution_spec: description: Distribution for running open benchmarks providers: inference: - - remote::openai - - remote::anthropic - - remote::gemini - - remote::groq - - remote::together + - provider_id: openai + provider_type: remote::openai + - provider_id: anthropic + provider_type: remote::anthropic + - provider_id: gemini + provider_type: remote::gemini + - provider_id: groq + provider_type: remote::groq + - provider_id: together + provider_type: remote::together vector_io: - - inline::sqlite-vec - - remote::chromadb - - remote::pgvector + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + - provider_id: chromadb + provider_type: remote::chromadb + - provider_id: pgvector + provider_type: remote::pgvector safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference eval: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - provider_id: huggingface + provider_type: remote::huggingface + - provider_id: localfs + provider_type: inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol image_type: conda +image_name: open-benchmark additional_pip_packages: - aiosqlite - sqlalchemy[asyncio] diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index ae25c9fc9..3a17e7525 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -96,19 +96,33 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo def get_distribution_template() -> DistributionTemplate: inference_providers, available_models = get_inference_providers() providers = { - "inference": [p.provider_type for p in inference_providers], - "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "inference": inference_providers, + "vector_io": [ + Provider(provider_id="sqlite-vec", provider_type="inline::sqlite-vec"), + Provider(provider_id="chromadb", provider_type="remote::chromadb"), + Provider(provider_id="pgvector", provider_type="remote::pgvector"), + ], + "safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")], + "agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "eval": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "datasetio": [ + Provider(provider_id="huggingface", provider_type="remote::huggingface"), + Provider(provider_id="localfs", provider_type="inline::localfs"), + ], + "scoring": [ + Provider(provider_id="basic", provider_type="inline::basic"), + Provider(provider_id="llm-as-judge", provider_type="inline::llm-as-judge"), + Provider(provider_id="braintrust", provider_type="inline::braintrust"), + ], "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", + Provider(provider_id="brave-search", provider_type="remote::brave-search"), + Provider(provider_id="tavily-search", provider_type="remote::tavily-search"), + Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"), + Provider( + provider_id="model-context-protocol", + provider_type="remote::model-context-protocol", + ), ], } name = "open-benchmark" diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 828b960a2..ba6a5e9d6 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -106,10 +106,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -127,10 +125,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml index 645b59613..d5e816a54 100644 --- a/llama_stack/templates/postgres-demo/build.yaml +++ b/llama_stack/templates/postgres-demo/build.yaml @@ -3,22 +3,33 @@ distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: inference: - - remote::vllm - - inline::sentence-transformers + - provider_id: vllm-inference + provider_type: remote::vllm + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers vector_io: - - remote::chromadb + - provider_id: chromadb + provider_type: remote::chromadb safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol image_type: conda +image_name: postgres-demo additional_pip_packages: - asyncpg - psycopg2-binary diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index c7ab222ec..24e3f6f27 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -34,16 +34,24 @@ def get_distribution_template() -> DistributionTemplate: ), ] providers = { - "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), - "vector_io": ["remote::chromadb"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], + "inference": inference_providers + + [ + Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"), + ], + "vector_io": [ + Provider(provider_id="chromadb", provider_type="remote::chromadb"), + ], + "safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")], + "agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], + "telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")], "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", + Provider(provider_id="brave-search", provider_type="remote::brave-search"), + Provider(provider_id="tavily-search", provider_type="remote::tavily-search"), + Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"), + Provider( + provider_id="model-context-protocol", + provider_type="remote::model-context-protocol", + ), ], } name = "postgres-demo" diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index feb85e316..747b7dc53 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -18,7 +18,6 @@ providers: tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb @@ -70,10 +69,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: postgres host: ${env.POSTGRES_HOST:=localhost} diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml index 8180124f6..9b540ab62 100644 --- a/llama_stack/templates/starter/build.yaml +++ b/llama_stack/templates/starter/build.yaml @@ -3,57 +3,98 @@ distribution_spec: description: Quick start template for running Llama Stack with several popular providers providers: inference: - - remote::cerebras - - remote::ollama - - remote::vllm - - remote::tgi - - remote::hf::serverless - - remote::hf::endpoint - - remote::fireworks - - remote::together - - remote::bedrock - - remote::databricks - - remote::nvidia - - remote::runpod - - remote::openai - - remote::anthropic - - remote::gemini - - remote::groq - - remote::llama-openai-compat - - remote::sambanova - - remote::passthrough - - inline::sentence-transformers + - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__} + provider_type: remote::cerebras + - provider_id: ${env.ENABLE_OLLAMA:=__disabled__} + provider_type: remote::ollama + - provider_id: ${env.ENABLE_VLLM:=__disabled__} + provider_type: remote::vllm + - provider_id: ${env.ENABLE_TGI:=__disabled__} + provider_type: remote::tgi + - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__} + provider_type: remote::hf::serverless + - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__} + provider_type: remote::hf::endpoint + - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__} + provider_type: remote::fireworks + - provider_id: ${env.ENABLE_TOGETHER:=__disabled__} + provider_type: remote::together + - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} + provider_type: remote::bedrock + - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} + provider_type: remote::databricks + - provider_id: ${env.ENABLE_NVIDIA:=__disabled__} + provider_type: remote::nvidia + - provider_id: ${env.ENABLE_RUNPOD:=__disabled__} + provider_type: remote::runpod + - provider_id: ${env.ENABLE_OPENAI:=__disabled__} + provider_type: remote::openai + - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__} + provider_type: remote::anthropic + - provider_id: ${env.ENABLE_GEMINI:=__disabled__} + provider_type: remote::gemini + - provider_id: ${env.ENABLE_GROQ:=__disabled__} + provider_type: remote::groq + - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__} + provider_type: remote::llama-openai-compat + - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__} + provider_type: remote::sambanova + - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__} + provider_type: remote::passthrough + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers vector_io: - - inline::faiss - - inline::sqlite-vec - - inline::milvus - - remote::chromadb - - remote::pgvector + - provider_id: ${env.ENABLE_FAISS:=faiss} + provider_type: inline::faiss + - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__} + provider_type: inline::sqlite-vec + - provider_id: ${env.ENABLE_MILVUS:=__disabled__} + provider_type: inline::milvus + - provider_id: ${env.ENABLE_CHROMADB:=__disabled__} + provider_type: remote::chromadb + - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__} + provider_type: remote::pgvector files: - - inline::localfs + - provider_id: localfs + provider_type: inline::localfs safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference post_training: - - inline::huggingface + - provider_id: huggingface + provider_type: inline::huggingface eval: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - provider_id: huggingface + provider_type: remote::huggingface + - provider_id: localfs + provider_type: inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol image_type: conda +image_name: starter additional_pip_packages: - aiosqlite - asyncpg diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index c38933f98..d60800ebb 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -56,7 +56,6 @@ providers: api_key: ${env.TOGETHER_API_KEY} - provider_id: ${env.ENABLE_BEDROCK:=__disabled__} provider_type: remote::bedrock - config: {} - provider_id: ${env.ENABLE_DATABRICKS:=__disabled__} provider_type: remote::databricks config: @@ -107,7 +106,6 @@ providers: api_key: ${env.PASSTHROUGH_API_KEY} - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: ${env.ENABLE_FAISS:=faiss} provider_type: inline::faiss @@ -208,10 +206,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -229,10 +225,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index cee1094db..489117702 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -253,21 +253,91 @@ def get_distribution_template() -> DistributionTemplate: ] providers = { - "inference": ([p.provider_type for p in remote_inference_providers] + ["inline::sentence-transformers"]), - "vector_io": ([p.provider_type for p in vector_io_providers]), - "files": ["inline::localfs"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "post_training": ["inline::huggingface"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "inference": remote_inference_providers + + [ + Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + ) + ], + "vector_io": vector_io_providers, + "files": [ + Provider( + provider_id="localfs", + provider_type="inline::localfs", + ) + ], + "safety": [ + Provider( + provider_id="llama-guard", + provider_type="inline::llama-guard", + ) + ], + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "telemetry": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "post_training": [ + Provider( + provider_id="huggingface", + provider_type="inline::huggingface", + ) + ], + "eval": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "datasetio": [ + Provider( + provider_id="huggingface", + provider_type="remote::huggingface", + ), + Provider( + provider_id="localfs", + provider_type="inline::localfs", + ), + ], + "scoring": [ + Provider( + provider_id="basic", + provider_type="inline::basic", + ), + Provider( + provider_id="llm-as-judge", + provider_type="inline::llm-as-judge", + ), + Provider( + provider_id="braintrust", + provider_type="inline::braintrust", + ), + ], "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", + Provider( + provider_id="brave-search", + provider_type="remote::brave-search", + ), + Provider( + provider_id="tavily-search", + provider_type="remote::tavily-search", + ), + Provider( + provider_id="rag-runtime", + provider_type="inline::rag-runtime", + ), + Provider( + provider_id="model-context-protocol", + provider_type="remote::model-context-protocol", + ), ], } files_provider = Provider( diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index fb2528873..e9054f95d 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from pathlib import Path -from typing import Literal +from typing import Any, Literal import jinja2 import rich @@ -35,6 +35,51 @@ from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages +def filter_empty_values(obj: Any) -> Any: + """Recursively filter out specific empty values from a dictionary or list. + + This function removes: + - Empty strings ('') only when they are the 'module' field + - Empty dictionaries ({}) only when they are the 'config' field + - None values (always excluded) + """ + if obj is None: + return None + + if isinstance(obj, dict): + filtered = {} + for key, value in obj.items(): + # Special handling for specific fields + if key == "module" and isinstance(value, str) and value == "": + # Skip empty module strings + continue + elif key == "config" and isinstance(value, dict) and not value: + # Skip empty config dictionaries + continue + elif key == "container_image" and not value: + # Skip empty container_image names + continue + else: + # For all other fields, recursively filter but preserve empty values + filtered_value = filter_empty_values(value) + # if filtered_value is not None: + filtered[key] = filtered_value + return filtered + + elif isinstance(obj, list): + filtered = [] + for item in obj: + filtered_item = filter_empty_values(item) + if filtered_item is not None: + filtered.append(filtered_item) + return filtered + + else: + # For all other types (including empty strings and dicts that aren't module/config), + # preserve them as-is + return obj + + def get_model_registry( available_models: dict[str, list[ProviderModelEntry]], ) -> tuple[list[ModelInput], bool]: @@ -138,31 +183,26 @@ class RunConfigSettings(BaseModel): def run_config( self, name: str, - providers: dict[str, list[str]], + providers: dict[str, list[Provider]], container_image: str | None = None, ) -> dict: provider_registry = get_provider_registry() - provider_configs = {} - for api_str, provider_types in providers.items(): + for api_str, provider_objs in providers.items(): if api_providers := self.provider_overrides.get(api_str): # Convert Provider objects to dicts for YAML serialization - provider_configs[api_str] = [ - p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers - ] + provider_configs[api_str] = [p.model_dump(exclude_none=True) for p in api_providers] continue provider_configs[api_str] = [] - for provider_type in provider_types: - provider_id = provider_type.split("::")[-1] - + for provider in provider_objs: api = Api(api_str) - if provider_type not in provider_registry[api]: - raise ValueError(f"Unknown provider type: {provider_type} for API: {api_str}") + if provider.provider_type not in provider_registry[api]: + raise ValueError(f"Unknown provider type: {provider.provider_type} for API: {api_str}") - config_class = provider_registry[api][provider_type].config_class + config_class = provider_registry[api][provider.provider_type].config_class assert config_class is not None, ( - f"No config class for provider type: {provider_type} for API: {api_str}" + f"No config class for provider type: {provider.provider_type} for API: {api_str}" ) config_class = instantiate_class_type(config_class) @@ -171,14 +211,9 @@ class RunConfigSettings(BaseModel): else: config = {} - provider_configs[api_str].append( - Provider( - provider_id=provider_id, - provider_type=provider_type, - config=config, - ).model_dump(exclude_none=True) - ) - + provider.config = config + # Convert Provider object to dict for YAML serialization + provider_configs[api_str].append(provider.model_dump(exclude_none=True)) # Get unique set of APIs from providers apis = sorted(providers.keys()) @@ -222,7 +257,7 @@ class DistributionTemplate(BaseModel): description: str distro_type: Literal["self_hosted", "remote_hosted", "ondevice"] - providers: dict[str, list[str]] + providers: dict[str, list[Provider]] run_configs: dict[str, RunConfigSettings] template_path: Path | None = None @@ -255,13 +290,28 @@ class DistributionTemplate(BaseModel): if self.additional_pip_packages: additional_pip_packages.extend(self.additional_pip_packages) + # Create minimal providers for build config (without runtime configs) + build_providers = {} + for api, providers in self.providers.items(): + build_providers[api] = [] + for provider in providers: + # Create a minimal provider object with only essential build information + build_provider = Provider( + provider_id=provider.provider_id, + provider_type=provider.provider_type, + config={}, # Empty config for build + module=provider.module, + ) + build_providers[api].append(build_provider) + return BuildConfig( distribution_spec=DistributionSpec( description=self.description, container_image=self.container_image, - providers=self.providers, + providers=build_providers, ), - image_type="conda", # default to conda, can be overridden + image_type="conda", + image_name=self.name, additional_pip_packages=sorted(set(additional_pip_packages)), ) @@ -270,7 +320,7 @@ class DistributionTemplate(BaseModel): providers_table += "|-----|-------------|\n" for api, providers in sorted(self.providers.items()): - providers_str = ", ".join(f"`{p}`" for p in providers) + providers_str = ", ".join(f"`{p.provider_type}`" for p in providers) providers_table += f"| {api} | {providers_str} |\n" template = self.template_path.read_text() @@ -334,7 +384,7 @@ class DistributionTemplate(BaseModel): build_config = self.build_config() with open(yaml_output_dir / "build.yaml", "w") as f: yaml.safe_dump( - build_config.model_dump(exclude_none=True), + filter_empty_values(build_config.model_dump(exclude_none=True)), f, sort_keys=False, ) @@ -343,7 +393,7 @@ class DistributionTemplate(BaseModel): run_config = settings.run_config(self.name, self.providers, self.container_image) with open(yaml_output_dir / yaml_pth, "w") as f: yaml.safe_dump( - {k: v for k, v in run_config.items() if v is not None}, + filter_empty_values(run_config), f, sort_keys=False, ) diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml index 08ee2c5ce..bc992f0c7 100644 --- a/llama_stack/templates/watsonx/build.yaml +++ b/llama_stack/templates/watsonx/build.yaml @@ -3,31 +3,49 @@ distribution_spec: description: Use watsonx for running LLM inference providers: inference: - - remote::watsonx - - inline::sentence-transformers + - provider_id: watsonx + provider_type: remote::watsonx + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers vector_io: - - inline::faiss + - provider_id: faiss + provider_type: inline::faiss safety: - - inline::llama-guard + - provider_id: llama-guard + provider_type: inline::llama-guard agents: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference telemetry: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference eval: - - inline::meta-reference + - provider_id: meta-reference + provider_type: inline::meta-reference datasetio: - - remote::huggingface - - inline::localfs + - provider_id: huggingface + provider_type: remote::huggingface + - provider_id: localfs + provider_type: inline::localfs scoring: - - inline::basic - - inline::llm-as-judge - - inline::braintrust + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::rag-runtime - - remote::model-context-protocol + - provider_id: brave-search + provider_type: remote::brave-search + - provider_id: tavily-search + provider_type: remote::tavily-search + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol image_type: conda +image_name: watsonx additional_pip_packages: -- aiosqlite - sqlalchemy[asyncio] +- aiosqlite +- aiosqlite diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index afbbdb917..f5fe31bef 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -20,7 +20,6 @@ providers: project_id: ${env.WATSONX_PROJECT_ID:=} - provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -74,10 +73,8 @@ providers: scoring: - provider_id: basic provider_type: inline::basic - config: {} - provider_id: llm-as-judge provider_type: inline::llm-as-judge - config: {} - provider_id: braintrust provider_type: inline::braintrust config: @@ -95,10 +92,8 @@ providers: max_results: 3 - provider_id: rag-runtime provider_type: inline::rag-runtime - config: {} - provider_id: model-context-protocol provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py index ea185f05d..c13bbea36 100644 --- a/llama_stack/templates/watsonx/watsonx.py +++ b/llama_stack/templates/watsonx/watsonx.py @@ -18,19 +18,87 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { - "inference": ["remote::watsonx", "inline::sentence-transformers"], - "vector_io": ["inline::faiss"], - "safety": ["inline::llama-guard"], - "agents": ["inline::meta-reference"], - "telemetry": ["inline::meta-reference"], - "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "inference": [ + Provider( + provider_id="watsonx", + provider_type="remote::watsonx", + ), + Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + ), + ], + "vector_io": [ + Provider( + provider_id="faiss", + provider_type="inline::faiss", + ) + ], + "safety": [ + Provider( + provider_id="llama-guard", + provider_type="inline::llama-guard", + ) + ], + "agents": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "telemetry": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "eval": [ + Provider( + provider_id="meta-reference", + provider_type="inline::meta-reference", + ) + ], + "datasetio": [ + Provider( + provider_id="huggingface", + provider_type="remote::huggingface", + ), + Provider( + provider_id="localfs", + provider_type="inline::localfs", + ), + ], + "scoring": [ + Provider( + provider_id="basic", + provider_type="inline::basic", + ), + Provider( + provider_id="llm-as-judge", + provider_type="inline::llm-as-judge", + ), + Provider( + provider_id="braintrust", + provider_type="inline::braintrust", + ), + ], "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::rag-runtime", - "remote::model-context-protocol", + Provider( + provider_id="brave-search", + provider_type="remote::brave-search", + ), + Provider( + provider_id="tavily-search", + provider_type="remote::tavily-search", + ), + Provider( + provider_id="rag-runtime", + provider_type="inline::rag-runtime", + ), + Provider( + provider_id="model-context-protocol", + provider_type="remote::model-context-protocol", + ), ], } diff --git a/tests/external/build.yaml b/tests/external/build.yaml index 90dcc97aa..c928febdb 100644 --- a/tests/external/build.yaml +++ b/tests/external/build.yaml @@ -3,7 +3,8 @@ distribution_spec: description: Custom distro for CI tests providers: weather: - - remote::kaze + - provider_id: kaze + provider_type: remote::kaze image_type: venv image_name: ci-test external_providers_dir: ~/.llama/providers.d