feat: Add Nvidia e2e beginner notebook and tool calling notebook (#1964)

# What does this PR do? This PR contains two sets of notebooks that serve as reference material for developers getting started with Llama Stack using the NVIDIA Provider. Developers should be able to execute these notebooks end-to-end, pointing to their NeMo Microservices deployment. 1. `beginner_e2e/`: Notebook that walks through a beginner end-to-end workflow that covers creating datasets, running inference, customizing and evaluating models, and running safety checks. 2. `tool_calling/`: Notebook that is ported over from the [Data Flywheel & Tool Calling notebook](https://github.com/NVIDIA/GenerativeAIExamples/tree/main/nemo/data-flywheel) that is referenced in the NeMo Microservices docs. I updated the notebook to use the Llama Stack client wherever possible, and added relevant instructions. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - Both notebook folders contain READMEs with pre-requisites. To manually test these notebooks, you'll need to have a deployment of the NeMo Microservices Platform and update the `config.py` file with your deployment's information. - I've run through these notebooks manually end-to-end to verify each step works. [//]: # (## Documentation) --------- Co-authored-by: Jash Gulabrai <jgulabrai@nvidia.com>
2025-12-04 02:03:44 +00:00 · 2025-06-16 11:29:01 -04:00 · 2025-06-16 11:29:01 -04:00 · 40e2c97915
commit 40e2c97915
parent 436c7aa751
26 changed files with 6823 additions and 7 deletions
--- a/llama_stack/providers/remote/datasetio/nvidia/README.md
+++ b/llama_stack/providers/remote/datasetio/nvidia/README.md
@ -32,7 +32,6 @@ import os

 os.environ["NVIDIA_API_KEY"] = "your-api-key"
 os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
-os.environ["NVIDIA_USER_ID"] = "llama-stack-user"
 os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
 os.environ["NVIDIA_PROJECT_ID"] = "test-project"
 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
--- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@ -36,6 +36,10 @@ class NvidiaDatasetIOAdapter:
        url = f"{self.config.datasets_url}{path}"
        request_headers = self.headers.copy()

+        # Set default Content-Type for JSON requests
+        if json is not None:
+            request_headers["Content-Type"] = "application/json"
+
        if headers:
            request_headers.update(headers)

--- a/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/llama_stack/providers/remote/safety/nvidia/config.py
@ -25,13 +25,16 @@ class NVIDIASafetyConfig(BaseModel):

    guardrails_service_url: str = Field(
        default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"),
-        description="The url for accessing the guardrails service",
+        description="The url for accessing the Guardrails service",
+    )
+    config_id: str | None = Field(
+        default_factory=lambda: os.getenv("NVIDIA_GUARDRAILS_CONFIG_ID", "self-check"),
+        description="Guardrails configuration ID to use from the Guardrails configuration store",
    )
-    config_id: str | None = Field(default="self-check", description="Config ID to use from the config store")

    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
-            "config_id": "self-check",
+            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
        }
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@ -144,3 +144,6 @@ llama stack run ./run.yaml \
  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
  --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
+
+## Example Notebooks
+For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in `docs/notebooks/nvidia`.
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -130,6 +130,10 @@ def get_distribution_template() -> DistributionTemplate:
                "http://0.0.0.0:7331",
                "URL for the NeMo Guardrails Service",
            ),
+            "NVIDIA_GUARDRAILS_CONFIG_ID": (
+                "self-check",
+                "NVIDIA Guardrail Configuration ID",
+            ),
            "NVIDIA_EVALUATOR_URL": (
                "http://0.0.0.0:7331",
                "URL for the NeMo Evaluator Service",
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -23,7 +23,7 @@ providers:
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: self-check
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -37,7 +37,7 @@ providers:
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: self-check
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -32,7 +32,7 @@ providers:
    provider_type: remote::nvidia
    config:
      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: self-check
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference