In-progress: e2e notebook with partial Eval integration

This commit is contained in:
Jash Gulabrai 2025-04-08 14:08:01 -04:00
parent 861962fa80
commit c04ab0133d
19 changed files with 832 additions and 624 deletions

View file

@ -31,9 +31,9 @@
"import os\n",
"\n",
"# NVIDIA URLs\n",
"NDS_URL = \"\"\n",
"NEMO_URL = \"\"\n",
"NIM_URL = \"\"\n",
"NDS_URL = \"https://datastore.int.aire.nvidia.com\"\n",
"NEMO_URL = \"https://nmp.int.aire.nvidia.com\"\n",
"NIM_URL = \"https://nim.int.aire.nvidia.com\"\n",
"\n",
"# Inference env vars\n",
"os.environ[\"NVIDIA_BASE_URL\"] = NIM_URL\n",
@ -51,12 +51,15 @@
"os.environ[\"NVIDIA_OUTPUT_MODEL_DIR\"] = CUSTOMIZED_MODEL_DIR\n",
"\n",
"# Guardrails env vars\n",
"os.environ[\"GUARDRAILS_SERVICE_URL\"] = NEMO_URL"
"os.environ[\"GUARDRAILS_SERVICE_URL\"] = NEMO_URL\n",
"\n",
"# Evaluator env vars\n",
"os.environ[\"NVIDIA_EVALUATOR_URL\"] = NEMO_URL\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -67,14 +70,14 @@
"from time import sleep, time\n",
"from typing import Dict\n",
"\n",
"import aiohttp\n",
"import requests\n",
"from huggingface_hub import HfApi\n",
"# import aiohttp\n",
"# import requests\n",
"# from huggingface_hub import HfApi\n",
"\n",
"os.environ[\"HF_ENDPOINT\"] = f\"{NDS_URL}/v1/hf\"\n",
"os.environ[\"HF_TOKEN\"] = \"token\"\n",
"# os.environ[\"HF_ENDPOINT\"] = f\"{NDS_URL}/v1/hf\"\n",
"# os.environ[\"HF_TOKEN\"] = \"token\"\n",
"\n",
"hf_api = HfApi(endpoint=os.environ.get(\"HF_ENDPOINT\"), token=os.environ.get(\"HF_TOKEN\"))"
"# hf_api = HfApi(endpoint=os.environ.get(\"HF_ENDPOINT\"), token=os.environ.get(\"HF_TOKEN\"))"
]
},
{
@ -87,546 +90,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Initializing NVIDIASafetyAdapter(https://nmp.int.aire.nvidia.com)...\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Using config <span style=\"color: #000080; text-decoration-color: #000080\">nvidia</span>:\n",
"</pre>\n"
],
"text/plain": [
"Using config \u001b[34mnvidia\u001b[0m:\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">apis:\n",
"- agents\n",
"- datasetio\n",
"- eval\n",
"- inference\n",
"- post_training\n",
"- safety\n",
"- scoring\n",
"- telemetry\n",
"- tool_runtime\n",
"- vector_io\n",
"benchmarks: <span style=\"font-weight: bold\">[]</span>\n",
"container_image: null\n",
"datasets: <span style=\"font-weight: bold\">[]</span>\n",
"image_name: nvidia\n",
"logging: null\n",
"metadata_store:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
"models:\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama3-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama3-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-FP8\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-1b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3b-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11b-vision-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11b-vision-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90b-vision-instruct\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90b-vision-instruct\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8192</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2048</span>\n",
" model_id: nvidia/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-nv-embedqa-1b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-nv-embedqa-1b-v2\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1024</span>\n",
" model_id: nvidia/nv-embedqa-e5-v5\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-e5-v5\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4096</span>\n",
" model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
"- metadata:\n",
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">512</span>\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1024</span>\n",
" model_id: snowflake/arctic-embed-l\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: snowflake/arctic-embed-l\n",
"providers:\n",
" agents:\n",
" - config:\n",
" persistence_store:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">agents_store.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" datasetio:\n",
" - config:\n",
" kvstore:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">localfs_datasetio.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: localfs\n",
" provider_type: inline::localfs\n",
" eval:\n",
" - config:\n",
" kvstore:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">meta_reference_eval.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" inference:\n",
" - config:\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nim.int.aire.nvidia.com</span>\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" post_training:\n",
" - config:\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" customizer_url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nmp.int.aire.nvidia.com</span>\n",
" dataset_namespace: default\n",
" project_id: test-project\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" safety:\n",
" - config:\n",
" config_id: self-check\n",
" guardrails_service_url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://nmp.int.aire.nvidia.com</span>\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" scoring:\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: basic\n",
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::ba</span>sic\n",
" telemetry:\n",
" - config:\n",
" service_name: <span style=\"color: #008000; text-decoration-color: #008000\">\"\\u200B\"</span>\n",
" sinks: sqlite\n",
" sqlite_db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">trace_store.db</span>\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" tool_runtime:\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: rag-runtime\n",
" provider_type: inline::rag-runtime\n",
" vector_io:\n",
" - config:\n",
" kvstore:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/jgulabrai/.llama/distributions/nvidia/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: faiss\n",
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::fa</span>iss\n",
"scoring_fns: <span style=\"font-weight: bold\">[]</span>\n",
"server:\n",
" auth: null\n",
" port: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8321</span>\n",
" tls_certfile: null\n",
" tls_keyfile: null\n",
"shields: <span style=\"font-weight: bold\">[]</span>\n",
"tool_groups:\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: rag-runtime\n",
" toolgroup_id: builtin::rag\n",
"vector_dbs: <span style=\"font-weight: bold\">[]</span>\n",
"version: <span style=\"color: #008000; text-decoration-color: #008000\">'2'</span>\n",
"\n",
"</pre>\n"
],
"text/plain": [
"apis:\n",
"- agents\n",
"- datasetio\n",
"- eval\n",
"- inference\n",
"- post_training\n",
"- safety\n",
"- scoring\n",
"- telemetry\n",
"- tool_runtime\n",
"- vector_io\n",
"benchmarks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"container_image: null\n",
"datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"image_name: nvidia\n",
"logging: null\n",
"metadata_store:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
"models:\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama3-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3\u001b[0m-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama3-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama3-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-8b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-70b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.1\u001b[0m-405b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-1B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-1b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-3b-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-11b-vision-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: nvidia\n",
" provider_model_id: meta/llama-\u001b[1;36m3.2\u001b[0m-90b-vision-instruct\n",
"- metadata:\n",
" context_length: \u001b[1;36m8192\u001b[0m\n",
" embedding_dimension: \u001b[1;36m2048\u001b[0m\n",
" model_id: nvidia/llama-\u001b[1;36m3.2\u001b[0m-nv-embedqa-1b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/llama-\u001b[1;36m3.2\u001b[0m-nv-embedqa-1b-v2\n",
"- metadata:\n",
" context_length: \u001b[1;36m512\u001b[0m\n",
" embedding_dimension: \u001b[1;36m1024\u001b[0m\n",
" model_id: nvidia/nv-embedqa-e5-v5\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-e5-v5\n",
"- metadata:\n",
" context_length: \u001b[1;36m512\u001b[0m\n",
" embedding_dimension: \u001b[1;36m4096\u001b[0m\n",
" model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: nvidia/nv-embedqa-mistral-7b-v2\n",
"- metadata:\n",
" context_length: \u001b[1;36m512\u001b[0m\n",
" embedding_dimension: \u001b[1;36m1024\u001b[0m\n",
" model_id: snowflake/arctic-embed-l\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: nvidia\n",
" provider_model_id: snowflake/arctic-embed-l\n",
"providers:\n",
" agents:\n",
" - config:\n",
" persistence_store:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95magents_store.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" datasetio:\n",
" - config:\n",
" kvstore:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mlocalfs_datasetio.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: localfs\n",
" provider_type: inline::localfs\n",
" eval:\n",
" - config:\n",
" kvstore:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mmeta_reference_eval.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" inference:\n",
" - config:\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" url: \u001b[4;94mhttps://nim.int.aire.nvidia.com\u001b[0m\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" post_training:\n",
" - config:\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" customizer_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n",
" dataset_namespace: default\n",
" project_id: test-project\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" safety:\n",
" - config:\n",
" config_id: self-check\n",
" guardrails_service_url: \u001b[4;94mhttps://nmp.int.aire.nvidia.com\u001b[0m\n",
" provider_id: nvidia\n",
" provider_type: remote::nvidia\n",
" scoring:\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: basic\n",
" provider_type: inlin\u001b[1;92me::ba\u001b[0msic\n",
" telemetry:\n",
" - config:\n",
" service_name: \u001b[32m\"\\u200B\"\u001b[0m\n",
" sinks: sqlite\n",
" sqlite_db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" tool_runtime:\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: rag-runtime\n",
" provider_type: inline::rag-runtime\n",
" vector_io:\n",
" - config:\n",
" kvstore:\n",
" db_path: \u001b[35m/Users/jgulabrai/.llama/distributions/nvidia/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: faiss\n",
" provider_type: inlin\u001b[1;92me::fa\u001b[0miss\n",
"scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"server:\n",
" auth: null\n",
" port: \u001b[1;36m8321\u001b[0m\n",
" tls_certfile: null\n",
" tls_keyfile: null\n",
"shields: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"tool_groups:\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: rag-runtime\n",
" toolgroup_id: builtin::rag\n",
"vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"version: \u001b[32m'2'\u001b[0m\n",
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"\n",
@ -634,6 +100,53 @@
"client.initialize()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Helper functions for waiting on jobs\n",
"from llama_stack.apis.common.job_types import JobStatus\n",
"\n",
"def wait_customization_job(job_id: str, polling_interval: int = 10, timeout: int = 6000):\n",
" start_time = time()\n",
"\n",
" response = client.post_training.job.status(job_uuid=job_id)\n",
" job_status = response.status\n",
"\n",
" print(f\"Job status: {job_status} after {time() - start_time} seconds.\")\n",
"\n",
" while job_status in [JobStatus.scheduled, JobStatus.in_progress]:\n",
" sleep(polling_interval)\n",
" response = client.post_training.job.status(job_uuid=job_id)\n",
" job_status = response.status\n",
"\n",
" print(f\"Job status: {job_status} after {time() - start_time} seconds.\")\n",
"\n",
" if time() - start_time > timeout:\n",
" raise RuntimeError(f\"Customization Job {job_id} took more than {timeout} seconds.\")\n",
" \n",
" return job_status\n",
"\n",
"def wait_eval_job(benchmark_id: str, job_id: str, polling_interval: int = 10, timeout: int = 6000):\n",
" start_time = time()\n",
" job_status = client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job_id)\n",
"\n",
" print(f\"Job status: {job_status} after {time() - start_time} seconds.\")\n",
"\n",
" while job_status in [JobStatus.scheduled, JobStatus.in_progress]:\n",
" sleep(polling_interval)\n",
" job_status = client.eval.jobs.status(benchmark_id=benchmark_id, job_id=job_id)\n",
"\n",
" print(f\"Job status: {job_status} after {time() - start_time} seconds.\")\n",
"\n",
" if time() - start_time > timeout:\n",
" raise RuntimeError(f\"Evaluation Job {job_id} took more than {timeout} seconds.\")\n",
"\n",
" return job_status\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -643,11 +156,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"sample_squad_test_dataset_name = \"jg-llama-stack\"\n",
"sample_squad_test_dataset_name = \"jg-llama-stack-sample-squad-data\"\n",
"namespace = \"default\"\n",
"repo_id = f\"{namespace}/{sample_squad_test_dataset_name}\""
]
@ -767,12 +280,160 @@
"TODO: Implement this section after Evalutor integration is done."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"benchmark_id = \"jg-llama-stack-3\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# Register a benchmark, which creates an Evaluation Config\n",
"simple_eval_config = {\n",
" \"benchmark_id\": benchmark_id,\n",
" \"dataset_id\": \"\",\n",
" \"scoring_functions\": [],\n",
" \"metadata\": {\n",
" \"type\": \"custom\",\n",
" \"params\": {\n",
" \"parallelism\": 8\n",
" },\n",
" \"tasks\": {\n",
" \"qa\": {\n",
" \"type\": \"completion\",\n",
" \"params\": {\n",
" \"template\": {\n",
" \"prompt\": \"{{prompt}}\",\n",
" \"max_tokens\": 200\n",
" }\n",
" },\n",
" \"dataset\": {\n",
" \"files_url\": f\"hf://datasets/{repo_id}/testing/testing.jsonl\"\n",
" },\n",
" \"metrics\": {\n",
" \"bleu\": {\n",
" \"type\": \"bleu\",\n",
" \"params\": {\n",
" \"references\": [\n",
" \"{{ideal_response}}\"\n",
" ]\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
"}\n",
"\n",
"response = client.benchmarks.register(\n",
" benchmark_id=benchmark_id,\n",
" dataset_id=repo_id,\n",
" scoring_functions=simple_eval_config[\"scoring_functions\"],\n",
" metadata=simple_eval_config[\"metadata\"]\n",
")\n",
"print(f\"Created benchmark {benchmark_id}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for benchmark in client.benchmarks.list():\n",
" print(benchmark)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Launch a simple evaluation with the benchmark\n",
"response = client.eval.run_eval(\n",
" benchmark_id=benchmark_id,\n",
" benchmark_config={\n",
" \"eval_candidate\": {\n",
" \"type\": \"model\",\n",
" \"model\": \"meta/llama-3.1-8b-instruct\",\n",
" \"sampling_params\": {\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 1.0,\n",
" \"top_p\": 0.95,\n",
" },\n",
" \"max_tokens\": 4096,\n",
" \"repeat_penalty\": 1.0,\n",
" },\n",
" }\n",
" }\n",
")\n",
"job_id = response.model_dump()[\"job_id\"]\n",
"print(f\"Created evaluation job {job_id}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Wait for the job to complete\n",
"job = wait_eval_job(benchmark_id=benchmark_id, job_id=job_id, polling_interval=5, timeout=600)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Job {job_id} status: {job.status}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"job_results = client.eval.jobs.retrieve(benchmark_id=benchmark_id, job_id=job_id)\n",
"print(f\"Job results: {job_results.model_dump()}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Extract bleu score and assert it's within range\n",
"initial_bleu_score = job_results.scores[benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"sentence\"][\"value\"]\n",
"print(f\"Initial bleu score: {initial_bleu_score}\")\n",
"\n",
"assert initial_bleu_score >= 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Extract accuracy and assert it's within range\n",
"initial_accuracy_score = job_results.scores[benchmark_id].aggregated_results[\"tasks\"][\"qa\"][\"metrics\"][\"bleu\"][\"scores\"][\"corpus\"][\"value\"]\n",
"print(f\"Initial accuracy: {initial_accuracy_score}\")\n",
"\n",
"assert initial_accuracy_score >= 0.5"
]
},
{
"cell_type": "markdown",
@ -827,11 +488,17 @@
"metadata": {},
"outputs": [],
"source": [
"# Customized model isn't available in the list of models, so this check doesn't work.\n",
"# customized_model = client.models.retrieve(f\"{NAMESPACE}/{CUSTOMIZED_MODEL_DIR}\")\n",
"# assert customized_model is not None\n",
"job_status = client.post_training.job.status(job_uuid=job_id)\n",
"print(f\"Job status: {job_status.status}\")"
"# Wait for the job to complete\n",
"job_status = wait_customization_job(job_id=job_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Job {job_id} status: {job_status}\")"
]
},
{
@ -840,10 +507,7 @@
"metadata": {},
"outputs": [],
"source": [
"# TODO: This doesn't work - errors with model_id not found.\n",
"# Depends on https://github.com/meta-llama/llama-stack/pull/1859\n",
"# Verify that inference with the new model works\n",
"\n",
"from llama_stack.apis.models.models import ModelType\n",
"\n",
"# TODO: Uncomment after https://github.com/meta-llama/llama-stack/pull/1859 is merged\n",
@ -853,14 +517,15 @@
"# provider_id=\"nvidia\",\n",
"# )\n",
"\n",
"response = client.inference.completion(\n",
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
" stream=False,\n",
" model_id=f\"default/{CUSTOMIZED_MODEL_DIR}\",\n",
" sampling_params={\n",
" \"max_tokens\": 50,\n",
" },\n",
")"
"# TODO: This won't work until the code above works - errors with model_id not found.\n",
"# response = client.inference.completion(\n",
"# content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
"# stream=False,\n",
"# model_id=f\"default/{CUSTOMIZED_MODEL_DIR}\",\n",
"# sampling_params={\n",
"# \"max_tokens\": 50,\n",
"# },\n",
"# )"
]
},
{
@ -868,7 +533,7 @@
"metadata": {},
"source": [
"## TODO: Evaluate Customized Model\n",
"Implement this section after Evalutor integration is done."
"Implement this section after Evalutor integration is done, and we can register Customized model in Model Registry."
]
},
{
@ -1078,39 +743,18 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Shield(identifier='self-check', provider_id='nvidia', provider_resource_id='self-check', type='shield', params={}, access_attributes=None)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"client.shields.register(shield_id=shield_id, provider_id=\"nvidia\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'model': 'self-check', 'messages': [{'role': 'system', 'content': 'You are stupid.'}], 'temperature': 1.0, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'max_tokens': 160, 'stream': False, 'guardrails': {'config_id': 'self-check'}}\n",
"{'status': 'success', 'rails_status': {'self check input': {'status': 'success'}}, 'guardrails_data': {'log': {'activated_rails': [], 'stats': {'total_duration': 0.0007240772247314453, 'llm_calls_duration': 0.0, 'llm_calls_count': 0, 'llm_calls_total_prompt_tokens': 0, 'llm_calls_total_completion_tokens': 0, 'llm_calls_total_tokens': 0}}}}\n",
"Safety response: RunShieldResponse(violation=None)\n"
]
}
],
"outputs": [],
"source": [
"# Check inference with guardrails\n",
"# TODO: For some reason, `role: \"user\"` returns a 422 error.\n",
@ -1154,7 +798,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
"version": "3.11.10"
}
},
"nbformat": 4,