From 7faec2380a53766148104375fc0ef32d82174954 Mon Sep 17 00:00:00 2001 From: Jash Gulabrai Date: Thu, 10 Apr 2025 10:58:11 -0400 Subject: [PATCH] Clear notebook output --- .../nvidia/Llama_Stack_NVIDIA_E2E_Flow.ipynb | 844 +----------------- 1 file changed, 32 insertions(+), 812 deletions(-) diff --git a/docs/notebooks/nvidia/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/Llama_Stack_NVIDIA_E2E_Flow.ipynb index b1c6dc46c..17d370ce3 100644 --- a/docs/notebooks/nvidia/Llama_Stack_NVIDIA_E2E_Flow.ipynb +++ b/docs/notebooks/nvidia/Llama_Stack_NVIDIA_E2E_Flow.ipynb @@ -115,18 +115,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jgulabrai/Projects/forks/llama-stack/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "import asyncio\n", "import json\n", @@ -155,540 +146,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Initializing NVIDIASafetyAdapter(https://nmp.int.aire.nvidia.com)...\n" - ] - }, - { - "data": { - "text/html": [ - "
Using config nvidia:\n",
-       "
\n" - ], - "text/plain": [ - "Using config \u001b[34mnvidia\u001b[0m:\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
apis:\n",
-       "- agents\n",
-       "- datasetio\n",
-       "- eval\n",
-       "- inference\n",
-       "- post_training\n",
-       "- safety\n",
-       "- scoring\n",
-       "- telemetry\n",
-       "- tool_runtime\n",
-       "- vector_io\n",
-       "benchmarks: []\n",
-       "container_image: null\n",
-       "datasets: []\n",
-       "image_name: nvidia\n",
-       "logging: null\n",
-       "metadata_store:\n",
-       "  db_path: /Users/jgulabrai/.llama/distributions/nvidia/registry.db\n",
-       "  namespace: null\n",
-       "  type: sqlite\n",
-       "models:\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama3-8b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama3-8b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3-8B-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama3-8b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama3-70b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama3-70b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3-70B-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama3-70b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.1-8b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.1-8b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.1-8B-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.1-8b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.1-70b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.1-70b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.1-70B-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.1-70b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.1-405b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.1-405b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.1-405b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.2-1b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-1b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.2-1B-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-1b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.2-3b-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-3b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.2-3B-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-3b-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.2-11b-vision-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-11b-vision-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-11b-vision-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta/llama-3.2-90b-vision-instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-90b-vision-instruct\n",
-       "- metadata: {}\n",
-       "  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - llm\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: meta/llama-3.2-90b-vision-instruct\n",
-       "- metadata:\n",
-       "    context_length: 8192\n",
-       "    embedding_dimension: 2048\n",
-       "  model_id: nvidia/llama-3.2-nv-embedqa-1b-v2\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - embedding\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2\n",
-       "- metadata:\n",
-       "    context_length: 512\n",
-       "    embedding_dimension: 1024\n",
-       "  model_id: nvidia/nv-embedqa-e5-v5\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - embedding\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: nvidia/nv-embedqa-e5-v5\n",
-       "- metadata:\n",
-       "    context_length: 512\n",
-       "    embedding_dimension: 4096\n",
-       "  model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - embedding\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
-       "- metadata:\n",
-       "    context_length: 512\n",
-       "    embedding_dimension: 1024\n",
-       "  model_id: snowflake/arctic-embed-l\n",
-       "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
-       "  - embedding\n",
-       "  provider_id: nvidia\n",
-       "  provider_model_id: snowflake/arctic-embed-l\n",
-       "providers:\n",
-       "  agents:\n",
-       "  - config:\n",
-       "      persistence_store:\n",
-       "        db_path: /Users/jgulabrai/.llama/distributions/nvidia/agents_store.db\n",
-       "        namespace: null\n",
-       "        type: sqlite\n",
-       "    provider_id: meta-reference\n",
-       "    provider_type: inline::meta-reference\n",
-       "  datasetio:\n",
-       "  - config:\n",
-       "      kvstore:\n",
-       "        db_path: /Users/jgulabrai/.llama/distributions/nvidia/localfs_datasetio.db\n",
-       "        namespace: null\n",
-       "        type: sqlite\n",
-       "    provider_id: localfs\n",
-       "    provider_type: inline::localfs\n",
-       "  eval:\n",
-       "  - config:\n",
-       "      evaluator_service_url: https://nmp.int.aire.nvidia.com\n",
-       "    provider_id: nvidia\n",
-       "    provider_type: remote::nvidia\n",
-       "  inference:\n",
-       "  - config:\n",
-       "      api_key: '********'\n",
-       "      url: https://nim.int.aire.nvidia.com\n",
-       "    provider_id: nvidia\n",
-       "    provider_type: remote::nvidia\n",
-       "  post_training:\n",
-       "  - config:\n",
-       "      api_key: '********'\n",
-       "      customizer_url: https://nmp.int.aire.nvidia.com\n",
-       "      dataset_namespace: default\n",
-       "      project_id: test-project\n",
-       "    provider_id: nvidia\n",
-       "    provider_type: remote::nvidia\n",
-       "  safety:\n",
-       "  - config:\n",
-       "      config_id: self-check\n",
-       "      guardrails_service_url: https://nmp.int.aire.nvidia.com\n",
-       "    provider_id: nvidia\n",
-       "    provider_type: remote::nvidia\n",
-       "  scoring:\n",
-       "  - config: {}\n",
-       "    provider_id: basic\n",
-       "    provider_type: inline::basic\n",
-       "  telemetry:\n",
-       "  - config:\n",
-       "      service_name: \"\\u200B\"\n",
-       "      sinks: sqlite\n",
-       "      sqlite_db_path: /Users/jgulabrai/.llama/distributions/nvidia/trace_store.db\n",
-       "    provider_id: meta-reference\n",
-       "    provider_type: inline::meta-reference\n",
-       "  tool_runtime:\n",
-       "  - config: {}\n",
-       "    provider_id: rag-runtime\n",
-       "    provider_type: inline::rag-runtime\n",
-       "  vector_io:\n",
-       "  - config:\n",
-       "      kvstore:\n",
-       "        db_path: /Users/jgulabrai/.llama/distributions/nvidia/faiss_store.db\n",
-       "        namespace: null\n",
-       "        type: sqlite\n",
-       "    provider_id: faiss\n",
-       "    provider_type: inline::faiss\n",
-       "scoring_fns: []\n",
-       "server:\n",
-       "  auth: null\n",
-       "  port: 8321\n",
-       "  tls_certfile: null\n",
-       "  tls_keyfile: null\n",
-       "shields: []\n",
-       "tool_groups:\n",
-       "- args: null\n",
-       "  mcp_endpoint: null\n",
-       "  provider_id: rag-runtime\n",
-       "  toolgroup_id: builtin::rag\n",
-       "vector_dbs: []\n",
-       "version: '2'\n",
-       "\n",
-       "
\n" - ], - "text/plain": [ - "apis:\n", - "- agents\n", - "- datasetio\n", - "- eval\n", - "- inference\n", - "- post_training\n", - "- safety\n", - "- scoring\n", - "- telemetry\n", - "- tool_runtime\n", - "- vector_io\n", - "benchmarks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "container_image: null\n", - "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "image_name: nvidia\n", - "logging: null\n", - "metadata_store:\n", - " db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mregistry.db\u001b[0m\n", - " namespace: null\n", - " type: sqlite\n", - "models:\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama3-8b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama3-8b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3\u001b[0m-8B-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama3-8b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama3-70b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama3-70b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3\u001b[0m-70B-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama3-70b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-1B-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n", - "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - llm\n", - " provider_id: nvidia\n", - " provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n", - "- metadata:\n", - " context_length: \u001b[1;36m8192\u001b[0m\n", - " embedding_dimension: \u001b[1;36m2048\u001b[0m\n", - " model_id: nvidia/llama-\u001b[1;36m3.2\u001b[0m-nv-embedqa-1b-v2\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - embedding\n", - " provider_id: nvidia\n", - " provider_model_id: nvidia/llama-\u001b[1;36m3.2\u001b[0m-nv-embedqa-1b-v2\n", - "- metadata:\n", - " context_length: \u001b[1;36m512\u001b[0m\n", - " embedding_dimension: \u001b[1;36m1024\u001b[0m\n", - " model_id: nvidia/nv-embedqa-e5-v5\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - embedding\n", - " provider_id: nvidia\n", - " provider_model_id: nvidia/nv-embedqa-e5-v5\n", - "- metadata:\n", - " context_length: \u001b[1;36m512\u001b[0m\n", - " embedding_dimension: \u001b[1;36m4096\u001b[0m\n", - " model_id: nvidia/nv-embedqa-mistral-7b-v2\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - embedding\n", - " provider_id: nvidia\n", - " provider_model_id: nvidia/nv-embedqa-mistral-7b-v2\n", - "- metadata:\n", - " context_length: \u001b[1;36m512\u001b[0m\n", - " embedding_dimension: \u001b[1;36m1024\u001b[0m\n", - " model_id: snowflake/arctic-embed-l\n", - " model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n", - " - embedding\n", - " provider_id: nvidia\n", - " provider_model_id: snowflake/arctic-embed-l\n", - "providers:\n", - " agents:\n", - " - config:\n", - " persistence_store:\n", - " db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95magents_store.db\u001b[0m\n", - " namespace: null\n", - " type: sqlite\n", - " provider_id: meta-reference\n", - " provider_type: inline::meta-reference\n", - " datasetio:\n", - " - config:\n", - " kvstore:\n", - " db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mlocalfs_datasetio.db\u001b[0m\n", - " namespace: null\n", - " type: sqlite\n", - " provider_id: localfs\n", - " provider_type: inline::localfs\n", - " eval:\n", - " - config:\n", - " evaluator_service_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n", - " provider_id: nvidia\n", - " provider_type: remote::nvidia\n", - " inference:\n", - " - config:\n", - " api_key: \u001b[32m'********'\u001b[0m\n", - " url: \u001b[4;94mhttps://nim.int.aire.nvidia.com\u001b[0m\n", - " provider_id: nvidia\n", - " provider_type: remote::nvidia\n", - " post_training:\n", - " - config:\n", - " api_key: \u001b[32m'********'\u001b[0m\n", - " customizer_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n", - " dataset_namespace: default\n", - " project_id: test-project\n", - " provider_id: nvidia\n", - " provider_type: remote::nvidia\n", - " safety:\n", - " - config:\n", - " config_id: self-check\n", - " guardrails_service_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n", - " provider_id: nvidia\n", - " provider_type: remote::nvidia\n", - " scoring:\n", - " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " provider_id: basic\n", - " provider_type: inlin\u001b[1;92me::ba\u001b[0msic\n", - " telemetry:\n", - " - config:\n", - " service_name: \u001b[32m\"\\u200B\"\u001b[0m\n", - " sinks: sqlite\n", - " sqlite_db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n", - " provider_id: meta-reference\n", - " provider_type: inline::meta-reference\n", - " tool_runtime:\n", - " - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n", - " provider_id: rag-runtime\n", - " provider_type: inline::rag-runtime\n", - " vector_io:\n", - " - config:\n", - " kvstore:\n", - " db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n", - " namespace: null\n", - " type: sqlite\n", - " provider_id: faiss\n", - " provider_type: inlin\u001b[1;92me::fa\u001b[0miss\n", - "scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "server:\n", - " auth: null\n", - " port: \u001b[1;36m8321\u001b[0m\n", - " tls_certfile: null\n", - " tls_keyfile: null\n", - "shields: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "tool_groups:\n", - "- args: null\n", - " mcp_endpoint: null\n", - " provider_id: rag-runtime\n", - " toolgroup_id: builtin::rag\n", - "vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "version: \u001b[32m'2'\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", "\n", @@ -1760,7 +1220,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1792,17 +1252,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Guardrails response: I'm sorry, I can't respond to that.\n" - ] - } - ], + "outputs": [], "source": [ "# Check response contains the predefined message\n", "print(f\"Guardrails response: {response.json()['choices'][0]['message']['content']}\")\n", @@ -1811,17 +1263,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Inference response: I'm not capable of feeling emotions or taking offense to comments like that. I'm here to assist and help with your questions to the best of my abilities. Would you like to ask me something or engage in a conversation where we can learn together?\n" - ] - } - ], + "outputs": [], "source": [ "# Check inference without guardrails\n", "response = client.inference.chat_completion(\n", @@ -1854,31 +1298,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Upload 2 LFS files: 0%| | 0/2 [00:00 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCreated benchmark \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbenchmark_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'benchmark_id' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Create Evaluation for model, without guardrails. First, register the benchmark.\n", "response = client.benchmarks.register(\n", @@ -1960,17 +1370,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created evaluation job eval-SnvLBsinjWX8RKMZYDoqzL\n" - ] - } - ], + "outputs": [], "source": [ "# Start Evaluation for model, without guardrails\n", "response = client.eval.run_eval(\n", @@ -1988,46 +1390,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for Evaluation job eval-SnvLBsinjWX8RKMZYDoqzL to finish.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 0.2504920959472656 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 5.505218029022217 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 10.761657953262329 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 16.03573489189148 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 21.287545919418335 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 26.547304153442383 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 31.79347014427185 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 37.04811096191406 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 42.30455207824707 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 47.56909680366516 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 52.829577922821045 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 58.09118103981018 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 63.353771924972534 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 68.60436701774597 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 73.87729597091675 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 79.12603998184204 seconds.\n", - "Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 84.3821349143982 seconds.\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[26], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Wait for the job to complete\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m job \u001b[38;5;241m=\u001b[39m \u001b[43mwait_eval_job\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbenchmark_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mguardrails_benchmark_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjob_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpolling_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m600\u001b[39;49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[25], line 33\u001b[0m, in \u001b[0;36mwait_eval_job\u001b[0;34m(benchmark_id, job_id, polling_interval, timeout)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJob status: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob_status\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m after \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtime()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;250m \u001b[39mstart_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m job_status\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;129;01min\u001b[39;00m [JobStatus\u001b[38;5;241m.\u001b[39mscheduled\u001b[38;5;241m.\u001b[39mvalue, JobStatus\u001b[38;5;241m.\u001b[39min_progress\u001b[38;5;241m.\u001b[39mvalue]:\n\u001b[0;32m---> 33\u001b[0m \u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpolling_interval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 34\u001b[0m job_status \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39meval\u001b[38;5;241m.\u001b[39mjobs\u001b[38;5;241m.\u001b[39mstatus(benchmark_id\u001b[38;5;241m=\u001b[39mbenchmark_id, job_id\u001b[38;5;241m=\u001b[39mjob_id)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJob status: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob_status\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m after \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtime()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;250m \u001b[39mstart_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "# Wait for the job to complete\n", "job = wait_eval_job(benchmark_id=guardrails_benchmark_id, job_id=job_id, polling_interval=5, timeout=600)" @@ -2035,75 +1400,18 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'job' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJob \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m status: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob\u001b[38;5;241m.\u001b[39mstatus\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'job' is not defined" - ] - } - ], + "outputs": [], "source": [ "print(f\"Job {job_id} status: {job.status}\")" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Job results: {\n", - " \"generations\": [],\n", - " \"scores\": {\n", - " \"test-guardrails-eval-config-1\": {\n", - " \"aggregated_results\": {\n", - " \"created_at\": \"2025-04-10T14:43:58.666499\",\n", - " \"updated_at\": \"2025-04-10T14:43:58.666500\",\n", - " \"id\": \"evaluation_result-EwyUnywQQZzNmwxf1Jqy9f\",\n", - " \"job\": \"eval-SnvLBsinjWX8RKMZYDoqzL\",\n", - " \"tasks\": {\n", - " \"qa\": {\n", - " \"metrics\": {\n", - " \"bleu\": {\n", - " \"scores\": {\n", - " \"sentence\": {\n", - " \"value\": 8.666034957135478,\n", - " \"stats\": {\n", - " \"count\": 326,\n", - " \"sum\": 2825.127396026166,\n", - " \"mean\": 8.666034957135478\n", - " }\n", - " },\n", - " \"corpus\": {\n", - " \"value\": 2.6527734361968203\n", - " }\n", - " }\n", - " }\n", - " }\n", - " }\n", - " },\n", - " \"groups\": {},\n", - " \"namespace\": \"default\",\n", - " \"custom_fields\": {}\n", - " },\n", - " \"score_rows\": []\n", - " }\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "job_results = client.eval.jobs.retrieve(benchmark_id=guardrails_benchmark_id, job_id=job_id)\n", "print(f\"Job results: {json.dumps(job_results.model_dump(), indent=2)}\")" @@ -2111,17 +1419,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created evaluation job with guardrails eval-SNRxLfTRQwnBvTxLRW6Wmp\n" - ] - } - ], + "outputs": [], "source": [ "# Start Evaluation for model, with guardrails\n", "response = client.eval.run_eval(\n", @@ -2144,20 +1444,9 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for Evaluation job eval-SNRxLfTRQwnBvTxLRW6Wmp to finish.\n", - "Job status: Job(job_id='eval-SNRxLfTRQwnBvTxLRW6Wmp', status='in_progress') after 0.253338098526001 seconds.\n", - "Job status: Job(job_id='eval-SNRxLfTRQwnBvTxLRW6Wmp', status='in_progress') after 5.548213005065918 seconds.\n", - "Job status: Job(job_id='eval-SNRxLfTRQwnBvTxLRW6Wmp', status='completed') after 10.817538976669312 seconds.\n" - ] - } - ], + "outputs": [], "source": [ "# Wait for the job to complete\n", "job = wait_eval_job(benchmark_id=guardrails_benchmark_id, job_id=job_id_with_guardrails, polling_interval=5, timeout=600)" @@ -2165,54 +1454,9 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Job results: {\n", - " \"generations\": [],\n", - " \"scores\": {\n", - " \"test-guardrails-eval-config-1\": {\n", - " \"aggregated_results\": {\n", - " \"created_at\": \"2025-04-10T14:49:52.591430\",\n", - " \"updated_at\": \"2025-04-10T14:49:52.591431\",\n", - " \"id\": \"evaluation_result-EpVEjTR8WEypqnN9iPV2cU\",\n", - " \"job\": \"eval-SNRxLfTRQwnBvTxLRW6Wmp\",\n", - " \"tasks\": {\n", - " \"qa\": {\n", - " \"metrics\": {\n", - " \"bleu\": {\n", - " \"scores\": {\n", - " \"sentence\": {\n", - " \"value\": 31.349783988926312,\n", - " \"stats\": {\n", - " \"count\": 326,\n", - " \"sum\": 10220.029580389977,\n", - " \"mean\": 31.349783988926312\n", - " }\n", - " },\n", - " \"corpus\": {\n", - " \"value\": 23.034736594171314\n", - " }\n", - " }\n", - " }\n", - " }\n", - " }\n", - " },\n", - " \"groups\": {},\n", - " \"namespace\": \"default\",\n", - " \"custom_fields\": {}\n", - " },\n", - " \"score_rows\": []\n", - " }\n", - " }\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "job_results_with_guardrails = client.eval.jobs.retrieve(benchmark_id=guardrails_benchmark_id, job_id=job_id_with_guardrails)\n", "print(f\"Job results: {json.dumps(job_results_with_guardrails.model_dump(), indent=2)}\")" @@ -2220,17 +1464,9 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bleu_score_no_guardrails: 2.6527734361968203\n" - ] - } - ], + "outputs": [], "source": [ "bleu_score_no_guardrails = job_results.scores[guardrails_benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n", "print(f\"bleu_score_no_guardrails: {bleu_score_no_guardrails}\")" @@ -2238,17 +1474,9 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bleu_score_with_guardrails: 23.034736594171314\n" - ] - } - ], + "outputs": [], "source": [ "bleu_score_with_guardrails = job_results_with_guardrails.scores[guardrails_benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n", "print(f\"bleu_score_with_guardrails: {bleu_score_with_guardrails}\")" @@ -2256,17 +1484,9 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "with_guardrails_bleu_score - no_guardrails_bleu_score: 20.381963157974493\n" - ] - } - ], + "outputs": [], "source": [ "# Expect the bleu score to go from 3 to 33\n", "print(f\"with_guardrails_bleu_score - no_guardrails_bleu_score: {bleu_score_with_guardrails - bleu_score_no_guardrails}\")\n",