Clear notebook output

This commit is contained in:
Jash Gulabrai 2025-04-10 10:58:11 -04:00
parent a671b33589
commit 7faec2380a

View file

@ -115,18 +115,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/jgulabrai/Projects/forks/llama-stack/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [ "source": [
"import asyncio\n", "import asyncio\n",
"import json\n", "import json\n",
@ -155,540 +146,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initializing NVIDIASafetyAdapter(https://nmp.int.aire.nvidia.com)...\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Using config <span style=\"color: #000080; text-decoration-color: #000080\">nvidia</span>:\n",
"</pre>\n"
],
"text/plain": [
"Using config \u001b[34mnvidia\u001b[0m:\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">apis:\n",
"- agents\n",
"- datasetio\n",
"- eval\n",
"- inference\n",
"- post_training\n",
"- safety\n",
"- scoring\n",
"- telemetry\n",
"- tool_runtime\n",
"- vector_io\n",
"benchmarks: <span style=\"font-weight: bold\">[]</span>\n",
"container_image: null\n",
"datasets: <span style=\"font-weight: bold\">[]</span>\n",
"image_name: nvidia\n",
"logging: null\n",
"metadata_store:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
"models:\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama3-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama3-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-FP8\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11b-vision-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11b-vision-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90b-vision-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90b-vision-instruct\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8192</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2048</span>\n",
" model_id: nvidia/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-nv-embedqa-1b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-nv-embedqa-1b-v2\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1024</span>\n",
" model_id: nvidia/nv-embedqa-e5-v5\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-e5-v5\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4096</span>\n",
" model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1024</span>\n",
" model_id: snowflake/arctic-embed-l\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: snowflake/arctic-embed-l\n",
"providers:\n",
" agents:\n",
" - config:\n",
" persistence_store:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">agents_store.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" datasetio:\n",
" - config:\n",
" kvstore:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">localfs_datasetio.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: localfs\n",
" provider_type: inline::localfs\n",
" eval:\n",
" - config:\n",
" evaluator_service_url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nmp.int.aire.nvidia.com</span>\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" inference:\n",
" - config:\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nim.int.aire.nvidia.com</span>\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" post_training:\n",
" - config:\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" customizer_url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nmp.int.aire.nvidia.com</span>\n",
" dataset_namespace: default\n",
" project_id: test-project\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" safety:\n",
" - config:\n",
" config_id: self-check\n",
" guardrails_service_url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nmp.int.aire.nvidia.com</span>\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" scoring:\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: basic\n",
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::ba</span>sic\n",
" telemetry:\n",
" - config:\n",
" service_name: <span style=\"color: #008000; text-decoration-color: #008000\">\"\\u200B\"</span>\n",
" sinks: sqlite\n",
" sqlite_db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">trace_store.db</span>\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" tool_runtime:\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: rag-runtime\n",
" provider_type: inline::rag-runtime\n",
" vector_io:\n",
" - config:\n",
" kvstore:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: faiss\n",
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::fa</span>iss\n",
"scoring_fns: <span style=\"font-weight: bold\">[]</span>\n",
"server:\n",
" auth: null\n",
" port: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8321</span>\n",
" tls_certfile: null\n",
" tls_keyfile: null\n",
"shields: <span style=\"font-weight: bold\">[]</span>\n",
"tool_groups:\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: rag-runtime\n",
" toolgroup_id: builtin::rag\n",
"vector_dbs: <span style=\"font-weight: bold\">[]</span>\n",
"version: <span style=\"color: #008000; text-decoration-color: #008000\">'2'</span>\n",
"\n",
"</pre>\n"
],
"text/plain": [
"apis:\n",
"- agents\n",
"- datasetio\n",
"- eval\n",
"- inference\n",
"- post_training\n",
"- safety\n",
"- scoring\n",
"- telemetry\n",
"- tool_runtime\n",
"- vector_io\n",
"benchmarks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"container_image: null\n",
"datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"image_name: nvidia\n",
"logging: null\n",
"metadata_store:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
"models:\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama3-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3\u001b[0m-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama3-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-1B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n",
"- metadata:\n",
" context_length: \u001b[1;36m8192\u001b[0m\n",
" embedding_dimension: \u001b[1;36m2048\u001b[0m\n",
" model_id: nvidia/llama-\u001b[1;36m3.2\u001b[0m-nv-embedqa-1b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/llama-\u001b[1;36m3.2\u001b[0m-nv-embedqa-1b-v2\n",
"- metadata:\n",
" context_length: \u001b[1;36m512\u001b[0m\n",
" embedding_dimension: \u001b[1;36m1024\u001b[0m\n",
" model_id: nvidia/nv-embedqa-e5-v5\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-e5-v5\n",
"- metadata:\n",
" context_length: \u001b[1;36m512\u001b[0m\n",
" embedding_dimension: \u001b[1;36m4096\u001b[0m\n",
" model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
"- metadata:\n",
" context_length: \u001b[1;36m512\u001b[0m\n",
" embedding_dimension: \u001b[1;36m1024\u001b[0m\n",
" model_id: snowflake/arctic-embed-l\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: snowflake/arctic-embed-l\n",
"providers:\n",
" agents:\n",
" - config:\n",
" persistence_store:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95magents_store.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" datasetio:\n",
" - config:\n",
" kvstore:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mlocalfs_datasetio.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: localfs\n",
" provider_type: inline::localfs\n",
" eval:\n",
" - config:\n",
" evaluator_service_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" inference:\n",
" - config:\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" url: \u001b[4;94mhttps://nim.int.aire.nvidia.com\u001b[0m\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" post_training:\n",
" - config:\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" customizer_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n",
" dataset_namespace: default\n",
" project_id: test-project\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" safety:\n",
" - config:\n",
" config_id: self-check\n",
" guardrails_service_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" scoring:\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: basic\n",
" provider_type: inlin\u001b[1;92me::ba\u001b[0msic\n",
" telemetry:\n",
" - config:\n",
" service_name: \u001b[32m\"\\u200B\"\u001b[0m\n",
" sinks: sqlite\n",
" sqlite_db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" tool_runtime:\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: rag-runtime\n",
" provider_type: inline::rag-runtime\n",
" vector_io:\n",
" - config:\n",
" kvstore:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: faiss\n",
" provider_type: inlin\u001b[1;92me::fa\u001b[0miss\n",
"scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"server:\n",
" auth: null\n",
" port: \u001b[1;36m8321\u001b[0m\n",
" tls_certfile: null\n",
" tls_keyfile: null\n",
"shields: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"tool_groups:\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: rag-runtime\n",
" toolgroup_id: builtin::rag\n",
"vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"version: \u001b[32m'2'\u001b[0m\n",
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"\n", "\n",
@ -1760,7 +1220,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1792,17 +1252,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Guardrails response: I'm sorry, I can't respond to that.\n"
]
}
],
"source": [ "source": [
"# Check response contains the predefined message\n", "# Check response contains the predefined message\n",
"print(f\"Guardrails response: {response.json()['choices'][0]['message']['content']}\")\n", "print(f\"Guardrails response: {response.json()['choices'][0]['message']['content']}\")\n",
@ -1811,17 +1263,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Inference response: I'm not capable of feeling emotions or taking offense to comments like that. I'm here to assist and help with your questions to the best of my abilities. Would you like to ask me something or engage in a conversation where we can learn together?\n"
]
}
],
"source": [ "source": [
"# Check inference without guardrails\n", "# Check inference without guardrails\n",
"response = client.inference.chat_completion(\n", "response = client.inference.chat_completion(\n",
@ -1854,31 +1298,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"Upload 2 LFS files: 0%| | 0/2 [00:00<?, ?it/s]\n",
"\u001b[A\n",
"content_safety_input_50.jsonl: 100%|██████████| 44.9k/44.9k [00:00<00:00, 121kB/s] \n",
"content_safety_input.jsonl: 100%|██████████| 126k/126k [00:00<00:00, 277kB/s] \n",
"Upload 2 LFS files: 100%|██████████| 2/2 [00:00<00:00, 3.75it/s]\n"
]
},
{
"data": {
"text/plain": [
"CommitInfo(commit_url='', commit_message='Upload folder using huggingface_hub', commit_description='', oid='fbd209c224916aa76af32cbab627df79b665e73d', pr_url=None, repo_url=RepoUrl('', endpoint='https://huggingface.co', repo_type='model', repo_id=''), pr_revision=None, pr_num=None)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# Create dataset and upload test data\n", "# Create dataset and upload test data\n",
"hf_api.create_repo(guardrails_repo_id, repo_type=\"dataset\")\n", "hf_api.create_repo(guardrails_repo_id, repo_type=\"dataset\")\n",
@ -1934,19 +1356,7 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "NameError",
"evalue": "name 'benchmark_id' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[22], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Create Evaluation for model, without guardrails. First, register the benchmark.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m response \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mbenchmarks\u001b[38;5;241m.\u001b[39mregister(\n\u001b[1;32m 3\u001b[0m benchmark_id\u001b[38;5;241m=\u001b[39mguardrails_benchmark_id,\n\u001b[1;32m 4\u001b[0m dataset_id\u001b[38;5;241m=\u001b[39mguardrails_repo_id,\n\u001b[1;32m 5\u001b[0m scoring_functions\u001b[38;5;241m=\u001b[39mguardrails_eval_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscoring_functions\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 6\u001b[0m metadata\u001b[38;5;241m=\u001b[39mguardrails_eval_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 7\u001b[0m )\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCreated benchmark \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbenchmark_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'benchmark_id' is not defined"
]
}
],
"source": [ "source": [
"# Create Evaluation for model, without guardrails. First, register the benchmark.\n", "# Create Evaluation for model, without guardrails. First, register the benchmark.\n",
"response = client.benchmarks.register(\n", "response = client.benchmarks.register(\n",
@ -1960,17 +1370,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Created evaluation job eval-SnvLBsinjWX8RKMZYDoqzL\n"
]
}
],
"source": [ "source": [
"# Start Evaluation for model, without guardrails\n", "# Start Evaluation for model, without guardrails\n",
"response = client.eval.run_eval(\n", "response = client.eval.run_eval(\n",
@ -1988,46 +1390,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 26, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Waiting for Evaluation job eval-SnvLBsinjWX8RKMZYDoqzL to finish.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 0.2504920959472656 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 5.505218029022217 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 10.761657953262329 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 16.03573489189148 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 21.287545919418335 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 26.547304153442383 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 31.79347014427185 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 37.04811096191406 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 42.30455207824707 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 47.56909680366516 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 52.829577922821045 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 58.09118103981018 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 63.353771924972534 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 68.60436701774597 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 73.87729597091675 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 79.12603998184204 seconds.\n",
"Job status: Job(job_id='eval-SnvLBsinjWX8RKMZYDoqzL', status='in_progress') after 84.3821349143982 seconds.\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[26], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Wait for the job to complete\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m job \u001b[38;5;241m=\u001b[39m \u001b[43mwait_eval_job\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbenchmark_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mguardrails_benchmark_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjob_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjob_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpolling_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m600\u001b[39;49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[25], line 33\u001b[0m, in \u001b[0;36mwait_eval_job\u001b[0;34m(benchmark_id, job_id, polling_interval, timeout)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJob status: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob_status\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m after \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtime()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;250m \u001b[39mstart_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m job_status\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;129;01min\u001b[39;00m [JobStatus\u001b[38;5;241m.\u001b[39mscheduled\u001b[38;5;241m.\u001b[39mvalue, JobStatus\u001b[38;5;241m.\u001b[39min_progress\u001b[38;5;241m.\u001b[39mvalue]:\n\u001b[0;32m---> 33\u001b[0m \u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpolling_interval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 34\u001b[0m job_status \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39meval\u001b[38;5;241m.\u001b[39mjobs\u001b[38;5;241m.\u001b[39mstatus(benchmark_id\u001b[38;5;241m=\u001b[39mbenchmark_id, job_id\u001b[38;5;241m=\u001b[39mjob_id)\n\u001b[1;32m 36\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJob status: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob_status\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m after \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtime()\u001b[38;5;250m \u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;250m \u001b[39mstart_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [ "source": [
"# Wait for the job to complete\n", "# Wait for the job to complete\n",
"job = wait_eval_job(benchmark_id=guardrails_benchmark_id, job_id=job_id, polling_interval=5, timeout=600)" "job = wait_eval_job(benchmark_id=guardrails_benchmark_id, job_id=job_id, polling_interval=5, timeout=600)"
@ -2035,75 +1400,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "NameError",
"evalue": "name 'job' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJob \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m status: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mjob\u001b[38;5;241m.\u001b[39mstatus\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'job' is not defined"
]
}
],
"source": [ "source": [
"print(f\"Job {job_id} status: {job.status}\")" "print(f\"Job {job_id} status: {job.status}\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Job results: {\n",
" \"generations\": [],\n",
" \"scores\": {\n",
" \"test-guardrails-eval-config-1\": {\n",
" \"aggregated_results\": {\n",
" \"created_at\": \"2025-04-10T14:43:58.666499\",\n",
" \"updated_at\": \"2025-04-10T14:43:58.666500\",\n",
" \"id\": \"evaluation_result-EwyUnywQQZzNmwxf1Jqy9f\",\n",
" \"job\": \"eval-SnvLBsinjWX8RKMZYDoqzL\",\n",
" \"tasks\": {\n",
" \"qa\": {\n",
" \"metrics\": {\n",
" \"bleu\": {\n",
" \"scores\": {\n",
" \"sentence\": {\n",
" \"value\": 8.666034957135478,\n",
" \"stats\": {\n",
" \"count\": 326,\n",
" \"sum\": 2825.127396026166,\n",
" \"mean\": 8.666034957135478\n",
" }\n",
" },\n",
" \"corpus\": {\n",
" \"value\": 2.6527734361968203\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" },\n",
" \"groups\": {},\n",
" \"namespace\": \"default\",\n",
" \"custom_fields\": {}\n",
" },\n",
" \"score_rows\": []\n",
" }\n",
" }\n",
"}\n"
]
}
],
"source": [ "source": [
"job_results = client.eval.jobs.retrieve(benchmark_id=guardrails_benchmark_id, job_id=job_id)\n", "job_results = client.eval.jobs.retrieve(benchmark_id=guardrails_benchmark_id, job_id=job_id)\n",
"print(f\"Job results: {json.dumps(job_results.model_dump(), indent=2)}\")" "print(f\"Job results: {json.dumps(job_results.model_dump(), indent=2)}\")"
@ -2111,17 +1419,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Created evaluation job with guardrails eval-SNRxLfTRQwnBvTxLRW6Wmp\n"
]
}
],
"source": [ "source": [
"# Start Evaluation for model, with guardrails\n", "# Start Evaluation for model, with guardrails\n",
"response = client.eval.run_eval(\n", "response = client.eval.run_eval(\n",
@ -2144,20 +1444,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Waiting for Evaluation job eval-SNRxLfTRQwnBvTxLRW6Wmp to finish.\n",
"Job status: Job(job_id='eval-SNRxLfTRQwnBvTxLRW6Wmp', status='in_progress') after 0.253338098526001 seconds.\n",
"Job status: Job(job_id='eval-SNRxLfTRQwnBvTxLRW6Wmp', status='in_progress') after 5.548213005065918 seconds.\n",
"Job status: Job(job_id='eval-SNRxLfTRQwnBvTxLRW6Wmp', status='completed') after 10.817538976669312 seconds.\n"
]
}
],
"source": [ "source": [
"# Wait for the job to complete\n", "# Wait for the job to complete\n",
"job = wait_eval_job(benchmark_id=guardrails_benchmark_id, job_id=job_id_with_guardrails, polling_interval=5, timeout=600)" "job = wait_eval_job(benchmark_id=guardrails_benchmark_id, job_id=job_id_with_guardrails, polling_interval=5, timeout=600)"
@ -2165,54 +1454,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Job results: {\n",
" \"generations\": [],\n",
" \"scores\": {\n",
" \"test-guardrails-eval-config-1\": {\n",
" \"aggregated_results\": {\n",
" \"created_at\": \"2025-04-10T14:49:52.591430\",\n",
" \"updated_at\": \"2025-04-10T14:49:52.591431\",\n",
" \"id\": \"evaluation_result-EpVEjTR8WEypqnN9iPV2cU\",\n",
" \"job\": \"eval-SNRxLfTRQwnBvTxLRW6Wmp\",\n",
" \"tasks\": {\n",
" \"qa\": {\n",
" \"metrics\": {\n",
" \"bleu\": {\n",
" \"scores\": {\n",
" \"sentence\": {\n",
" \"value\": 31.349783988926312,\n",
" \"stats\": {\n",
" \"count\": 326,\n",
" \"sum\": 10220.029580389977,\n",
" \"mean\": 31.349783988926312\n",
" }\n",
" },\n",
" \"corpus\": {\n",
" \"value\": 23.034736594171314\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" },\n",
" \"groups\": {},\n",
" \"namespace\": \"default\",\n",
" \"custom_fields\": {}\n",
" },\n",
" \"score_rows\": []\n",
" }\n",
" }\n",
"}\n"
]
}
],
"source": [ "source": [
"job_results_with_guardrails = client.eval.jobs.retrieve(benchmark_id=guardrails_benchmark_id, job_id=job_id_with_guardrails)\n", "job_results_with_guardrails = client.eval.jobs.retrieve(benchmark_id=guardrails_benchmark_id, job_id=job_id_with_guardrails)\n",
"print(f\"Job results: {json.dumps(job_results_with_guardrails.model_dump(), indent=2)}\")" "print(f\"Job results: {json.dumps(job_results_with_guardrails.model_dump(), indent=2)}\")"
@ -2220,17 +1464,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"bleu_score_no_guardrails: 2.6527734361968203\n"
]
}
],
"source": [ "source": [
"bleu_score_no_guardrails = job_results.scores[guardrails_benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n", "bleu_score_no_guardrails = job_results.scores[guardrails_benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n",
"print(f\"bleu_score_no_guardrails: {bleu_score_no_guardrails}\")" "print(f\"bleu_score_no_guardrails: {bleu_score_no_guardrails}\")"
@ -2238,17 +1474,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"bleu_score_with_guardrails: 23.034736594171314\n"
]
}
],
"source": [ "source": [
"bleu_score_with_guardrails = job_results_with_guardrails.scores[guardrails_benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n", "bleu_score_with_guardrails = job_results_with_guardrails.scores[guardrails_benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n",
"print(f\"bleu_score_with_guardrails: {bleu_score_with_guardrails}\")" "print(f\"bleu_score_with_guardrails: {bleu_score_with_guardrails}\")"
@ -2256,17 +1484,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"with_guardrails_bleu_score - no_guardrails_bleu_score: 20.381963157974493\n"
]
}
],
"source": [ "source": [
"# Expect the bleu score to go from 3 to 33\n", "# Expect the bleu score to go from 3 to 33\n",
"print(f\"with_guardrails_bleu_score - no_guardrails_bleu_score: {bleu_score_with_guardrails - bleu_score_no_guardrails}\")\n", "print(f\"with_guardrails_bleu_score - no_guardrails_bleu_score: {bleu_score_with_guardrails - bleu_score_no_guardrails}\")\n",