diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index e9748721a..f0b190d6c 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -7,7 +7,7 @@ distribution_spec: vector_io: - inline::faiss safety: - - inline::llama-guard + - remote::nvidia agents: - inline::meta-reference telemetry: @@ -15,16 +15,9 @@ distribution_spec: eval: - inline::meta-reference datasetio: - - remote::huggingface - inline::localfs scoring: - inline::basic - - inline::llm-as-judge - - inline::braintrust tool_runtime: - - remote::brave-search - - remote::tavily-search - - inline::code-interpreter - inline::rag-runtime - - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 6636978db..1181706e1 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -79,7 +79,7 @@ def get_distribution_template() -> DistributionTemplate: return DistributionTemplate( name="nvidia", distro_type="remote_hosted", - description="Use NVIDIA NIM for running LLM inference", + description="Use NVIDIA NIM for running LLM inference and safety", container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, @@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate: ] }, default_models=[inference_model, safety_model], - default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")], default_tool_groups=default_tool_groups, ), }, diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index ec52e3ef1..e9105062e 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -31,8 +31,8 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_id: nvidia + provider_type: remote::nvidia config: {} agents: - provider_id: meta-reference @@ -54,9 +54,6 @@ providers: provider_type: inline::meta-reference config: {} datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: {} - provider_id: localfs provider_type: inline::localfs config: {} @@ -64,33 +61,10 @@ providers: - provider_id: basic provider_type: inline::basic config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db @@ -105,16 +79,13 @@ models: model_type: llm shields: - shield_id: ${env.SAFETY_MODEL} + provider_id: nvidia vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search - toolgroup_id: builtin::rag provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter server: port: 8321 diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 7cf527c04..4ce64cf59 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -110,8 +110,7 @@ models: - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 - provider_id: ollama - provider_model_id: all-minilm:latest + provider_id: sentence-transformers model_type: embedding shields: - shield_id: ${env.SAFETY_MODEL} diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index ab292c5e0..b4982f8e2 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -103,8 +103,7 @@ models: - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 - provider_id: ollama - provider_model_id: all-minilm:latest + provider_id: sentence-transformers model_type: embedding shields: [] vector_dbs: []