Clean up instructions and implementation; reorganize notebooks

2025-07-21 03:59:42 +00:00 · 2025-04-18 16:27:19 -04:00 · 2025-04-18 16:27:19 -04:00 · 4131e8146f
commit 4131e8146f
parent 0d9d333a4e
29 changed files with 2756 additions and 89 deletions
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -65,7 +65,7 @@ def get_distribution_template() -> DistributionTemplate:
    default_models = get_model_registry(available_models)
    return DistributionTemplate(
        name="nvidia",
-        distro_type="remote_hosted",
+        distro_type="self_hosted",
        description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
@ -103,6 +103,10 @@ def get_distribution_template() -> DistributionTemplate:
                "llama-stack-user",
                "NVIDIA User ID",
            ),
+            "NVIDIA_APPEND_API_VERSION": (
+                "True",
+                "Whether to append the API version to the base_url",
+            ),
            "NVIDIA_DATASET_NAMESPACE": (
                "default",
                "NVIDIA Dataset Namespace",
@ -127,6 +131,10 @@ def get_distribution_template() -> DistributionTemplate:
                "http://0.0.0.0:7331",
                "URL for the NeMo Guardrails Service",
            ),
+            "NVIDIA_GUARDRAILS_CONFIG_ID": (
+                "self-check",
+                "NVIDIA Guardrail Configuration ID",
+            ),
            "NVIDIA_EVALUATOR_URL": (
                "http://0.0.0.0:7331",
                "URL for the NeMo Evaluator Service",
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -18,11 +18,12 @@ providers:
    config:
      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
      api_key: ${env.NVIDIA_API_KEY:}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
  - provider_id: nvidia
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: self-check
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -36,7 +37,7 @@ providers:
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: self-check
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -18,6 +18,7 @@ providers:
    config:
      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
      api_key: ${env.NVIDIA_API_KEY:}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -31,7 +32,7 @@ providers:
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: self-check
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference