diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 9e468f08d..85b0a134b 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -288,16 +288,13 @@ ], "nvidia": [ "aiosqlite", - "autoevals", "blobfile", "chardet", - "datasets", "faiss-cpu", "fastapi", "fire", "httpx", "matplotlib", - "mcp", "nltk", "numpy", "openai", @@ -309,7 +306,6 @@ "pymongo", "pypdf", "redis", - "requests", "scikit-learn", "scipy", "sentencepiece", diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 2d6e85260..fcc61e866 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -6,13 +6,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | API | Provider(s) | |-----|-------------| | agents | `inline::meta-reference` | -| datasetio | `remote::huggingface`, `inline::localfs` | +| datasetio | `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::nvidia` | -| safety | `inline::llama-guard` | -| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| safety | `remote::nvidia` | +| scoring | `inline::basic` | | telemetry | `inline::meta-reference` | -| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` | +| tool_runtime | `inline::rag-runtime` | | vector_io | `inline::faiss` | diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index f0b190d6c..0c788ce86 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -1,6 +1,6 @@ version: '2' distribution_spec: - description: Use NVIDIA NIM for running LLM inference + description: Use NVIDIA NIM for running LLM inference and safety providers: inference: - remote::nvidia diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 98fd2747c..118953cc4 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -18,19 +18,13 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::nvidia"], "vector_io": ["inline::faiss"], - "safety": ["inline::llama-guard"], + "safety": ["remote::nvidia"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], "eval": ["inline::meta-reference"], - "datasetio": ["remote::huggingface", "inline::localfs"], - "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], - "tool_runtime": [ - "remote::brave-search", - "remote::tavily-search", - "inline::code-interpreter", - "inline::rag-runtime", - "remote::model-context-protocol", - ], + "datasetio": ["inline::localfs"], + "scoring": ["inline::basic"], + "tool_runtime": ["inline::rag-runtime"], } inference_provider = Provider( @@ -64,18 +58,10 @@ def get_distribution_template() -> DistributionTemplate: for m in _MODEL_ENTRIES ] default_tool_groups = [ - ToolGroupInput( - toolgroup_id="builtin::websearch", - provider_id="tavily-search", - ), ToolGroupInput( toolgroup_id="builtin::rag", provider_id="rag-runtime", ), - ToolGroupInput( - toolgroup_id="builtin::code_interpreter", - provider_id="code-interpreter", - ), ] return DistributionTemplate( diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index e9105062e..68efa106e 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -33,7 +33,9 @@ providers: safety: - provider_id: nvidia provider_type: remote::nvidia - config: {} + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} + config_id: self-check agents: - provider_id: meta-reference provider_type: inline::meta-reference diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 4c38ec24e..47995bd02 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -26,9 +26,11 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} + - provider_id: nvidia + provider_type: remote::nvidia + config: + guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331} + config_id: self-check agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -49,9 +51,6 @@ providers: provider_type: inline::meta-reference config: {} datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: {} - provider_id: localfs provider_type: inline::localfs config: {} @@ -59,33 +58,10 @@ providers: - provider_id: basic provider_type: inline::basic config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: code-interpreter - provider_type: inline::code-interpreter - config: {} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db @@ -148,11 +124,7 @@ datasets: [] scoring_fns: [] benchmarks: [] tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search - toolgroup_id: builtin::rag provider_id: rag-runtime -- toolgroup_id: builtin::code_interpreter - provider_id: code-interpreter server: port: 8321 diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 9f9a32d79..063840a50 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -97,7 +97,8 @@ models: - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers + provider_id: ollama + provider_model_id: all-minilm:latest model_type: embedding shields: - shield_id: ${env.SAFETY_MODEL} diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index d78f0838f..d64e07347 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -87,14 +87,6 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm -<<<<<<< HEAD -- metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding -======= ->>>>>>> upstream/main shields: [] vector_dbs: [] datasets: []