Merge branch 'main' into patch-1

2026-01-03 20:02:15 +00:00 · 2025-01-17 23:49:05 -05:00 · 2025-01-17 23:49:05 -05:00 · f5edd07b29
commit f5edd07b29
parent 04934d4644 3a9468ce9b
91 changed files with 995 additions and 632 deletions
--- a/.github/workflows/publish-to-test-pypi.yml
+++ b/.github/workflows/publish-to-test-pypi.yml
@ -239,5 +239,6 @@ jobs:
        pip install pytest nbval
        llama stack build --template together --image-type venv
        pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb
+        pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb

    # TODO: add trigger for integration test workflow & docker builds
--- a/distributions/dell-tgi/run.yaml
+++ b/distributions/dell-tgi/run.yaml
@ -1,6 +1,6 @@
 version: '2'
 image_name: local
-docker_image: null
+container_image: null
 conda_env: local
 apis:
 - shields
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -13,6 +13,7 @@
    "httpx",
    "huggingface_hub",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -45,6 +46,7 @@
    "fire",
    "httpx",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -78,6 +80,7 @@
    "fire",
    "httpx",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -101,14 +104,17 @@
  ],
  "remote-vllm": [
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
+    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -142,6 +148,7 @@
    "fireworks-ai",
    "httpx",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -176,6 +183,7 @@
    "httpx",
    "huggingface_hub",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -209,6 +217,7 @@
    "fire",
    "httpx",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -244,6 +253,7 @@
    "httpx",
    "lm-format-enforcer",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -279,6 +289,7 @@
    "fire",
    "httpx",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -315,6 +326,7 @@
    "httpx",
    "lm-format-enforcer",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
@ -421,6 +433,7 @@
    "httpx",
    "huggingface_hub",
    "matplotlib",
+    "mcp",
    "nltk",
    "numpy",
    "openai",
--- a/distributions/meta-reference-quantized-gpu/run.yaml
+++ b/distributions/meta-reference-quantized-gpu/run.yaml
@ -1,6 +1,6 @@
 version: '2'
 image_name: local
-docker_image: null
+container_image: null
 conda_env: local
 apis:
 - shields
--- a/distributions/vllm-gpu/run.yaml
+++ b/distributions/vllm-gpu/run.yaml
@ -1,6 +1,6 @@
 version: '2'
 image_name: local
-docker_image: null
+container_image: null
 conda_env: local
 apis:
 - shields
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -481,7 +481,7 @@
              "- telemetry\n",
              "conda_env: together\n",
              "datasets: <span style=\"font-weight: bold\">[]</span>\n",
-              "docker_image: null\n",
+              "container_image: null\n",
              "eval_tasks: <span style=\"font-weight: bold\">[]</span>\n",
              "image_name: together\n",
              "memory_banks: <span style=\"font-weight: bold\">[]</span>\n",
@ -600,7 +600,7 @@
              "- telemetry\n",
              "conda_env: together\n",
              "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
-              "docker_image: null\n",
+              "container_image: null\n",
              "eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
              "image_name: together\n",
              "memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@ -105,6 +105,7 @@
        }
      ],
      "source": [
+        "# NBVAL_SKIP\n",
        "!pip install -U llama-stack"
      ]
    },
@ -309,12 +310,13 @@
        }
      ],
      "source": [
+        "# NBVAL_SKIP\n",
        "!llama stack build --template together --image-type venv"
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@ -328,7 +330,16 @@
          "name": "stdout",
          "output_type": "stream",
          "text": [
-            "Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n"
+            "Not in Google Colab environment\n",
+            "\u001b[33mWarning: `bwrap` is not available. Code interpreter tool will not work correctly.\u001b[0m\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/opt/anaconda3/envs/master/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+            "  from .autonotebook import tqdm as notebook_tqdm\n"
          ]
        },
        {
@ -356,63 +367,83 @@
              "- safety\n",
              "- scoring\n",
              "- telemetry\n",
-              "conda_env: together\n",
+              "- tool_runtime\n",
              "datasets: <span style=\"font-weight: bold\">[]</span>\n",
-              "docker_image: null\n",
+              "container_image: null\n",
              "eval_tasks: <span style=\"font-weight: bold\">[]</span>\n",
              "image_name: together\n",
              "memory_banks: <span style=\"font-weight: bold\">[]</span>\n",
              "metadata_store:\n",
-              "  db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
+              "  db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
              "  namespace: null\n",
              "  type: sqlite\n",
              "models:\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct\n",
-              "  model_type: &amp;id001 !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
              "  - llm\n",
-              "  provider_id: null\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct-Turbo\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct-Turbo\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-FP8\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-Turbo\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct-Turbo\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct-Turbo\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct-Turbo\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
+              "  model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
+              "  provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct-Turbo\n",
+              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
              "- metadata: <span style=\"font-weight: bold\">{}</span>\n",
              "  model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision-Turbo\n",
+              "- metadata:\n",
+              "    embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span>\n",
+              "  model_id: all-MiniLM-L6-v2\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - embedding\n",
+              "  provider_id: sentence-transformers\n",
+              "  provider_model_id: null\n",
              "providers:\n",
              "  agents:\n",
              "  - config:\n",
              "      persistence_store:\n",
-              "        db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">agents_store.db</span>\n",
+              "        db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">agents_store.db</span>\n",
              "        namespace: null\n",
              "        type: sqlite\n",
              "    provider_id: meta-reference\n",
@ -430,14 +461,17 @@
              "    provider_type: inline::meta-reference\n",
              "  inference:\n",
              "  - config:\n",
-              "      api_key: 4985b03e627419b2964d34b8519ac6c4319f094d1ffb4f45514b4eb87e5427a2\n",
+              "      api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
              "      url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://api.together.xyz/v1</span>\n",
              "    provider_id: together\n",
              "    provider_type: remote::together\n",
+              "  - config: <span style=\"font-weight: bold\">{}</span>\n",
+              "    provider_id: sentence-transformers\n",
+              "    provider_type: inline::sentence-transformers\n",
              "  memory:\n",
              "  - config:\n",
              "      kvstore:\n",
-              "        db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
+              "        db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
              "        namespace: null\n",
              "        type: sqlite\n",
              "    provider_id: faiss\n",
@ -454,22 +488,52 @@
              "    provider_id: llm-as-judge\n",
              "    provider_type: inline::llm-as-judge\n",
              "  - config:\n",
-              "      openai_api_key: <span style=\"color: #008000; text-decoration-color: #008000\">''</span>\n",
+              "      openai_api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
              "    provider_id: braintrust\n",
              "    provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::b</span>raintrust\n",
              "  telemetry:\n",
              "  - config:\n",
              "      service_name: llama-stack\n",
              "      sinks: sqlite\n",
-              "      sqlite_db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">trace_store.db</span>\n",
+              "      sqlite_db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">trace_store.db</span>\n",
              "    provider_id: meta-reference\n",
              "    provider_type: inline::meta-reference\n",
+              "  tool_runtime:\n",
+              "  - config:\n",
+              "      api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
+              "      max_results: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>\n",
+              "    provider_id: brave-search\n",
+              "    provider_type: remot<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::b</span>rave-search\n",
+              "  - config:\n",
+              "      api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
+              "      max_results: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>\n",
+              "    provider_id: tavily-search\n",
+              "    provider_type: remote::tavily-search\n",
+              "  - config: <span style=\"font-weight: bold\">{}</span>\n",
+              "    provider_id: code-interpreter\n",
+              "    provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::c</span>ode-interpreter\n",
+              "  - config: <span style=\"font-weight: bold\">{}</span>\n",
+              "    provider_id: memory-runtime\n",
+              "    provider_type: inline::memory-runtime\n",
              "scoring_fns: <span style=\"font-weight: bold\">[]</span>\n",
              "shields:\n",
              "- params: null\n",
              "  provider_id: null\n",
              "  provider_shield_id: null\n",
              "  shield_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
+              "tool_groups:\n",
+              "- args: null\n",
+              "  mcp_endpoint: null\n",
+              "  provider_id: tavily-search\n",
+              "  toolgroup_id: builtin::websearch\n",
+              "- args: null\n",
+              "  mcp_endpoint: null\n",
+              "  provider_id: memory-runtime\n",
+              "  toolgroup_id: builtin::memory\n",
+              "- args: null\n",
+              "  mcp_endpoint: null\n",
+              "  provider_id: code-interpreter\n",
+              "  toolgroup_id: builtin::code_interpreter\n",
              "version: <span style=\"color: #008000; text-decoration-color: #008000\">'2'</span>\n",
              "\n",
              "</pre>\n"
@ -484,63 +548,83 @@
              "- safety\n",
              "- scoring\n",
              "- telemetry\n",
-              "conda_env: together\n",
+              "- tool_runtime\n",
              "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
-              "docker_image: null\n",
+              "container_image: null\n",
              "eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
              "image_name: together\n",
              "memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
              "metadata_store:\n",
-              "  db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
+              "  db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
              "  namespace: null\n",
              "  type: sqlite\n",
              "models:\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n",
-              "  model_type: &id001 !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
              "  - llm\n",
-              "  provider_id: null\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+              "  model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
+              "  provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
+              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
              "- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
              "  model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision\n",
-              "  model_type: *id001\n",
-              "  provider_id: null\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - llm\n",
+              "  provider_id: together\n",
              "  provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
+              "- metadata:\n",
+              "    embedding_dimension: \u001b[1;36m384\u001b[0m\n",
+              "  model_id: all-MiniLM-L6-v2\n",
+              "  model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
+              "  - embedding\n",
+              "  provider_id: sentence-transformers\n",
+              "  provider_model_id: null\n",
              "providers:\n",
              "  agents:\n",
              "  - config:\n",
              "      persistence_store:\n",
-              "        db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95magents_store.db\u001b[0m\n",
+              "        db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95magents_store.db\u001b[0m\n",
              "        namespace: null\n",
              "        type: sqlite\n",
              "    provider_id: meta-reference\n",
@ -558,14 +642,17 @@
              "    provider_type: inline::meta-reference\n",
              "  inference:\n",
              "  - config:\n",
-              "      api_key: 4985b03e627419b2964d34b8519ac6c4319f094d1ffb4f45514b4eb87e5427a2\n",
+              "      api_key: \u001b[32m'********'\u001b[0m\n",
              "      url: \u001b[4;94mhttps://api.together.xyz/v1\u001b[0m\n",
              "    provider_id: together\n",
              "    provider_type: remote::together\n",
+              "  - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+              "    provider_id: sentence-transformers\n",
+              "    provider_type: inline::sentence-transformers\n",
              "  memory:\n",
              "  - config:\n",
              "      kvstore:\n",
-              "        db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
+              "        db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
              "        namespace: null\n",
              "        type: sqlite\n",
              "    provider_id: faiss\n",
@ -582,58 +669,74 @@
              "    provider_id: llm-as-judge\n",
              "    provider_type: inline::llm-as-judge\n",
              "  - config:\n",
-              "      openai_api_key: \u001b[32m''\u001b[0m\n",
+              "      openai_api_key: \u001b[32m'********'\u001b[0m\n",
              "    provider_id: braintrust\n",
              "    provider_type: inlin\u001b[1;92me::b\u001b[0mraintrust\n",
              "  telemetry:\n",
              "  - config:\n",
              "      service_name: llama-stack\n",
              "      sinks: sqlite\n",
-              "      sqlite_db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n",
+              "      sqlite_db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n",
              "    provider_id: meta-reference\n",
              "    provider_type: inline::meta-reference\n",
+              "  tool_runtime:\n",
+              "  - config:\n",
+              "      api_key: \u001b[32m'********'\u001b[0m\n",
+              "      max_results: \u001b[1;36m3\u001b[0m\n",
+              "    provider_id: brave-search\n",
+              "    provider_type: remot\u001b[1;92me::b\u001b[0mrave-search\n",
+              "  - config:\n",
+              "      api_key: \u001b[32m'********'\u001b[0m\n",
+              "      max_results: \u001b[1;36m3\u001b[0m\n",
+              "    provider_id: tavily-search\n",
+              "    provider_type: remote::tavily-search\n",
+              "  - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+              "    provider_id: code-interpreter\n",
+              "    provider_type: inlin\u001b[1;92me::c\u001b[0mode-interpreter\n",
+              "  - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
+              "    provider_id: memory-runtime\n",
+              "    provider_type: inline::memory-runtime\n",
              "scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
              "shields:\n",
              "- params: null\n",
              "  provider_id: null\n",
              "  provider_shield_id: null\n",
              "  shield_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
+              "tool_groups:\n",
+              "- args: null\n",
+              "  mcp_endpoint: null\n",
+              "  provider_id: tavily-search\n",
+              "  toolgroup_id: builtin::websearch\n",
+              "- args: null\n",
+              "  mcp_endpoint: null\n",
+              "  provider_id: memory-runtime\n",
+              "  toolgroup_id: builtin::memory\n",
+              "- args: null\n",
+              "  mcp_endpoint: null\n",
+              "  provider_id: code-interpreter\n",
+              "  toolgroup_id: builtin::code_interpreter\n",
              "version: \u001b[32m'2'\u001b[0m\n",
              "\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/plain": [
-              "Model(identifier='meta-llama/Llama-3.1-405B-Instruct', metadata={}, provider_id='together', provider_resource_id='meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo', type='model', model_type='llm')"
-            ]
-          },
-          "execution_count": 5,
-          "metadata": {},
-          "output_type": "execute_result"
        }
      ],
      "source": [
        "import os\n",
        "\n",
-        "from google.colab import userdata\n",
-        "\n",
-        "os.environ[\"TOGETHER_API_KEY\"] = userdata.get(\"TOGETHER_API_KEY\")\n",
+        "try:\n",
+        "    from google.colab import userdata\n",
+        "    os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
+        "    os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
+        "except ImportError:\n",
+        "    print(\"Not in Google Colab environment\")\n",
        "\n",
        "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
        "\n",
        "client = LlamaStackAsLibraryClient(\"together\")\n",
-        "_ = client.initialize()\n",
-        "\n",
-        "# register 405B as LLM Judge model\n",
-        "client.models.register(\n",
-        "    model_id=\"meta-llama/Llama-3.1-405B-Instruct\",\n",
-        "    provider_model_id=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n",
-        "    provider_id=\"together\",\n",
-        ")\n"
+        "_ = client.initialize()"
      ]
    },
    {
@ -662,7 +765,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
      "metadata": {
        "id": "TC_IwIAQo4q-"
      },
@ -670,12 +773,12 @@
      "source": [
        "name = \"llamastack/mmmu\"\n",
        "subset = \"Agriculture\"\n",
-        "split = \"dev\"\n"
+        "split = \"dev\""
      ]
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
@ -788,130 +891,13 @@
      },
      "outputs": [
        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "feb82e061ee44283b4a46be858ef4cd7",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "README.md:   0%|          | 0.00/36.0k [00:00<?, ?B/s]"
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "Generating dev split: 100%|██████████| 5/5 [00:00<00:00, 139.81 examples/s]\n",
+            "Generating validation split: 100%|██████████| 30/30 [00:00<00:00, 258.29 examples/s]\n",
+            "Generating test split: 100%|██████████| 287/287 [00:01<00:00, 197.69 examples/s]\n"
          ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "c788d4e9e1e24dca9b6503689df9b631",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "dev-00000-of-00001.parquet:   0%|          | 0.00/29.5M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "44f585990aa244d8ba61f892dc1ccc1c",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "validation-00000-of-00001.parquet:   0%|          | 0.00/165M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "4fed5720f30b4b3cbbc606a4f25e223b",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "test-00000-of-00003.parquet:   0%|          | 0.00/461M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "9659140487ca4d3ea799196d2c1ecf61",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "test-00001-of-00003.parquet:   0%|          | 0.00/454M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "c06f9a090fb54c74b947634bf6d11fa8",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "test-00002-of-00003.parquet:   0%|          | 0.00/471M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "4f788a7920c346f3b42900825bd6711a",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Generating dev split:   0%|          | 0/5 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "9d2b6eabf7e14436b72bbf374b4a2a0a",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Generating validation split:   0%|          | 0/30 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "24e48376a72940679989a39a40bbe7f6",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Generating test split:   0%|          | 0/287 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
        }
      ],
      "source": [
@ -936,7 +922,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
@ -951,7 +937,7 @@
          "name": "stderr",
          "output_type": "stream",
          "text": [
-            "100%|██████████| 5/5 [00:51<00:00, 10.28s/it]\n"
+            "100%|██████████| 5/5 [00:42<00:00,  8.60s/it]\n"
          ]
        },
        {
@ -959,25 +945,21 @@
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Answer: D'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The Colorado potato beetle (Leptinotarsa decemlineata) is a significant pest of potatoes, causing damage to the leaves and stems of potato plants. The insect with black-colored antennae in the image is a Colorado potato beetle, which is known for its distinctive black and yellow stripes. On the other hand, the insect with tan-colored antennae is not a Colorado potato beetle and does not appear to be a pest of potatoes.\\n\\n*Answer*: B) The one with black coloured antennae'</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B) Two pathogens'</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'To determine the count of pathogens infecting this sunflower leaf, we need to analyze the image carefully. The image shows a sunflower leaf with several brown spots and patches on its surface. These brown spots and patches are indicative of fungal infections, which are common pathogens that affect sunflowers.\\n\\nUpon closer inspection, we can see that there are two distinct types of brown spots and patches on the leaf. One type is smaller and more circular in shape, while the other type is larger and more irregular in shape. This suggests that there may be two different pathogens infecting the leaf.\\n\\nHowever, without further information or testing, it is difficult to say for certain whether these two types of brown spots and patches are caused by different pathogens or if they are just different stages of the same infection. Therefore, based on the available information, the most likely answer is:\\n\\nAnswer: B) Two pathogens'</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA) Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB) Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC) Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD) Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE) Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Answer: D'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Based on the image, the most likely reason for the massive gum production on the trunks of these grapefruit trees in Cyprus is a fungal infection. The gummosis, or the production of gum, is a common symptom of fungal diseases in citrus trees, and it can be caused by various factors such as root damage, water stress, or nutrient deficiencies. However, in this case, the presence of the gum on the trunks of the trees suggests that the cause is more likely related to a fungal infection.\\n\\nAnswer: E) Fungal gummosis'</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The correct answer is D) Most viruses have a specific relationship with their vectors.\\n\\nExplanation:\\n\\n*   Laboratory work with micro manipulators can mimic the transmission of viruses, but this is not the primary method of virus transmission in nature.\\n*   Not all plant-feeding insects can transmit viruses; only specific species that have evolved to transmit particular viruses are capable of doing so.\\n*   Similarly, not all plant viruses can be transmitted by insects; some are transmitted through other means such as mechanical transmission or nematodes.\\n*   The correct assertion is that most viruses have a specific relationship with their vectors, meaning that each virus is typically transmitted by a specific type of insect or vector.\\n\\nAnswer: D'</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The petioles of this rhubarb are splitting, and we need to determine which of the listed issues would not be the cause. \\n\\nFirst, let's consider physiological problems (A). Rhubarb is a hardy plant, but it can still experience physiological issues due to factors like temperature fluctuations, water stress, or nutrient deficiencies. These issues could potentially cause the petioles to split.\\n\\nNext, let's look at phytoplasma infection (B). Phytoplasmas are bacteria-like organisms that can infect plants, causing a range of symptoms including yellowing or browning of leaves, stunted growth, and distorted or split petioles. So, phytoplasma infection could also be a possible cause.\\n\\nNow, let's consider animal damage (D). Animals like rabbits, deer, or rodents might feed on the rhubarb leaves, causing damage to the petioles and potentially leading to splitting.\\n\\nFinally, let's think about bacteria (E). Bacterial infections can cause a range of symptoms in plants, including soft rot, leaf spot, and petiole splitting. So, bacteria could also be a potential cause.\\n\\nBased on this analysis, it seems that all of the listed issues could potentially cause the petioles of this rhubarb to split. Therefore, the correct answer is:\\n\\nAnswer: C\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A) Physiological problems'</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"font-weight: bold\">]</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'basic::regex_parser_multiple_choice_answer'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'accuracy'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.2</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_correct'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_total'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5.0</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'accuracy'</span>: <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'accuracy'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.2</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_correct'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_total'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span><span style=\"font-weight: bold\">}}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}]</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">)</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"font-weight: bold\">}</span>\n",
@ -987,25 +969,21 @@
            "text/plain": [
              "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
+              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Colorado potato beetle \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLeptinotarsa decemlineata\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is a significant pest of potatoes, causing damage to the leaves and stems of potato plants. The insect with black-colored antennae in the image is a Colorado potato beetle, which is known for its distinctive black and yellow stripes. On the other hand, the insect with tan-colored antennae is not a Colorado potato beetle and does not appear to be a pest of potatoes.\\n\\n*Answer*: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m The one with black coloured antennae'\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'To determine the count of pathogens infecting this sunflower leaf, we need to analyze the image carefully. The image shows a sunflower leaf with several brown spots and patches on its surface. These brown spots and patches are indicative of fungal infections, which are common pathogens that affect sunflowers.\\n\\nUpon closer inspection, we can see that there are two distinct types of brown spots and patches on the leaf. One type is smaller and more circular in shape, while the other type is larger and more irregular in shape. This suggests that there may be two different pathogens infecting the leaf.\\n\\nHowever, without further information or testing, it is difficult to say for certain whether these two types of brown spots and patches are caused by different pathogens or if they are just different stages of the same infection. Therefore, based on the available information, the most likely answer is:\\n\\nAnswer: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Based on the image, the most likely reason for the massive gum production on the trunks of these grapefruit trees in Cyprus is a fungal infection. The gummosis, or the production of gum, is a common symptom of fungal diseases in citrus trees, and it can be caused by various factors such as root damage, water stress, or nutrient deficiencies. However, in this case, the presence of the gum on the trunks of the trees suggests that the cause is more likely related to a fungal infection.\\n\\nAnswer: E\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis'\u001b[0m\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The correct answer is D\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Most viruses have a specific relationship with their vectors.\\n\\nExplanation:\\n\\n*   Laboratory work with micro manipulators can mimic the transmission of viruses, but this is not the primary method of virus transmission in nature.\\n*   Not all plant-feeding insects can transmit viruses; only specific species that have evolved to transmit particular viruses are capable of doing so.\\n*   Similarly, not all plant viruses can be transmitted by insects; some are transmitted through other means such as mechanical transmission or nematodes.\\n*   The correct assertion is that most viruses have a specific relationship with their vectors, meaning that each virus is typically transmitted by a specific type of insect or vector.\\n\\nAnswer: D'\u001b[0m\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The petioles of this rhubarb are splitting, and we need to determine which of the listed issues would not be the cause. \\n\\nFirst, let's consider physiological problems \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Rhubarb is a hardy plant, but it can still experience physiological issues due to factors like temperature fluctuations, water stress, or nutrient deficiencies. These issues could potentially cause the petioles to split.\\n\\nNext, let's look at phytoplasma infection \u001b[0m\u001b[32m(\u001b[0m\u001b[32mB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Phytoplasmas are bacteria-like organisms that can infect plants, causing a range of symptoms including yellowing or browning of leaves, stunted growth, and distorted or split petioles. So, phytoplasma infection could also be a possible cause.\\n\\nNow, let's consider animal damage \u001b[0m\u001b[32m(\u001b[0m\u001b[32mD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Animals like rabbits, deer, or rodents might feed on the rhubarb leaves, causing damage to the petioles and potentially leading to splitting.\\n\\nFinally, let's think about bacteria \u001b[0m\u001b[32m(\u001b[0m\u001b[32mE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Bacterial infections can cause a range of symptoms in plants, including soft rot, leaf spot, and petiole splitting. So, bacteria could also be a potential cause.\\n\\nBased on this analysis, it seems that all of the listed issues could potentially cause the petioles of this rhubarb to split. Therefore, the correct answer is:\\n\\nAnswer: C\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological problems'\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[1m]\u001b[0m,\n",
              "\u001b[2;32m│   \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[32m'basic::regex_parser_multiple_choice_answer'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'num_correct'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'num_total'\u001b[0m: \u001b[1;36m5.0\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'num_correct'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'num_total'\u001b[0m: \u001b[1;36m5\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m)\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[1m}\u001b[0m\n",
@ -1056,7 +1034,9 @@
        "            \"model\": \"meta-llama/Llama-3.2-90B-Vision-Instruct\",\n",
        "            \"sampling_params\": {\n",
        "                \"strategy\": {\n",
-        "                    \"type\": \"greedy\",\n",
+        "                    \"type\": \"top_p\",\n",
+        "                    \"temperature\": 1.0,\n",
+        "                    \"top_p\": 0.95,\n",
        "                },\n",
        "                \"max_tokens\": 4096,\n",
        "                \"repeat_penalty\": 1.0,\n",
@ -1081,7 +1061,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 8,
      "metadata": {
        "id": "HXmZf3Ymw-aX"
      },
@ -1108,7 +1088,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 9,
      "metadata": {
        "id": "Gc8azb4Rxr5J"
      },
@ -1122,7 +1102,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 12,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
@ -1136,7 +1116,14 @@
          "name": "stderr",
          "output_type": "stream",
          "text": [
-            "100%|██████████| 5/5 [00:48<00:00,  9.68s/it]\n"
+            "  0%|          | 0/5 [00:00<?, ?it/s]"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 5/5 [00:31<00:00,  6.38s/it]\n"
          ]
        },
        {
@ -1144,29 +1131,25 @@
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The recipient of the IEEE Frank Rosenblatt Award in 2010 was Vladimir Vapnik'</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm not sure who received the IEEE Frank Rosenblatt Award in 2010.\"</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm not aware of the information about the 2018 Jerlov Award recipient.\"</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I am unable to verify who was awarded the Oceanography Society's Jerlov Award in 2018.\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'I do not have information on the Leipzig 1877 tournament.'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Radcliffe College was a women's liberal arts college, but it has since been integrated into Harvard University.\"</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The Leipzig 1877 tournament was organized in the honor of 50th anniversary of the first chess club in Germany (the Leipzig Chess Club's) founding and of the 50th anniversary of Paul Morphy's birth\"</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Karl Küchler's 1908 guidebook states that Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted 'Dying Achilles'.\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"font-weight: bold\">]</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'llm-as-judge::405b-simpleqa'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"font-weight: bold\">]</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">)</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"font-weight: bold\">}</span>\n",
@ -1176,29 +1159,25 @@
            "text/plain": [
              "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The recipient of the IEEE Frank Rosenblatt Award in 2010 was Vladimir Vapnik'\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not sure who received the IEEE Frank Rosenblatt Award in 2010.\"\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not aware of the information about the 2018 Jerlov Award recipient.\"\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify who was awarded the Oceanography Society's Jerlov Award in 2018.\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'I do not have information on the Leipzig 1877 tournament.'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college, but it has since been integrated into Harvard University.\"\u001b[0m\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The Leipzig 1877 tournament was organized in the honor of 50th anniversary of the first chess club in Germany \u001b[0m\u001b[32m(\u001b[0m\u001b[32mthe Leipzig Chess Club's\u001b[0m\u001b[32m)\u001b[0m\u001b[32m founding and of the 50th anniversary of Paul Morphy's birth\"\u001b[0m\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Karl Küchler's 1908 guidebook states that Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted 'Dying Achilles'.\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[1m]\u001b[0m,\n",
              "\u001b[2;32m│   \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n",
-              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m\n",
+              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m\n",
              "\u001b[2;32m│   │   │   \u001b[0m\u001b[1m]\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m)\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[1m}\u001b[0m\n",
@ -1210,6 +1189,13 @@
        }
      ],
      "source": [
+        "# register 405B as LLM Judge model\n",
+        "client.models.register(\n",
+        "    model_id=\"meta-llama/Llama-3.1-405B-Instruct\",\n",
+        "    provider_model_id=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n",
+        "    provider_id=\"together\",\n",
+        ")\n",
+        "\n",
        "client.eval_tasks.register(\n",
        "    eval_task_id=\"meta-reference::simpleqa\",\n",
        "    dataset_id=simpleqa_dataset_id,\n",
@ -1257,7 +1243,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 27,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
@ -1271,7 +1257,7 @@
          "name": "stderr",
          "output_type": "stream",
          "text": [
-            "5it [00:26,  5.29s/it]\n"
+            "5it [00:06,  1.33s/it]\n"
          ]
        },
        {
@ -1280,27 +1266,25 @@
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm sorry but I cannot find the recipient of the IEEE Frank Rosenblatt Award in 2010.\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The IEEE Frank Rosenblatt Award was given to Professor John Shawe-Taylor in 2010 for his contributions to the foundations of kernel methods.'</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm not sure who was awarded the Oceanography Society's Jerlov Award in 2018. Let me search for the information.\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The Jerlov Award is given by The Oceanography Society to recognize outstanding contributions to the field of ocean optics. The 2018 Jerlov Award was awarded to Dr. Kendall L. Carder.'</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The women's liberal arts college in Cambridge, Massachusetts is called Radcliffe College. However, in 1999, it merged with Harvard University and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The women's liberal arts college in Cambridge, Massachusetts is Radcliffe College. However, in 1999, Radcliffe College merged with Harvard University to form the Radcliffe Institute for Advanced Study at Harvard University. The institute is still located in Cambridge, Massachusetts, and is dedicated to supporting women's education and research.\"</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The Leipzig 1877 tournament was organized in honor of Adolf Anderssen.'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The 1877 Leipzig tournament was organized in honor of Anderssen, a German chess master.'</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">{</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Empress Elizabeth of Austria's favorite sculpture, made for her villa Achilleion at Corfu, depicted Achilles.\"</span>\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"According to Karl Küchler, Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted the Dying Achilles.\"</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"font-weight: bold\">}</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"font-weight: bold\">]</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   </span><span style=\"color: #008000; text-decoration-color: #008000\">'llm-as-judge::405b-simpleqa'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[</span>\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C.'</span><span style=\"font-weight: bold\">}</span>,\n",
-              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
+              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span><span style=\"font-weight: bold\">}</span>,\n",
              "<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│   │   │   │   </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>\n",
@ -1314,27 +1298,25 @@
              "\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm sorry but I cannot find the recipient of the IEEE Frank Rosenblatt Award in 2010.\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The IEEE Frank Rosenblatt Award was given to Professor John Shawe-Taylor in 2010 for his contributions to the foundations of kernel methods.'\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not sure who was awarded the Oceanography Society's Jerlov Award in 2018. Let me search for the information.\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Jerlov Award is given by The Oceanography Society to recognize outstanding contributions to the field of ocean optics. The 2018 Jerlov Award was awarded to Dr. Kendall L. Carder.'\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The women's liberal arts college in Cambridge, Massachusetts is called Radcliffe College. However, in 1999, it merged with Harvard University and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The women's liberal arts college in Cambridge, Massachusetts is Radcliffe College. However, in 1999, Radcliffe College merged with Harvard University to form the Radcliffe Institute for Advanced Study at Harvard University. The institute is still located in Cambridge, Massachusetts, and is dedicated to supporting women's education and research.\"\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Leipzig 1877 tournament was organized in honor of Adolf Anderssen.'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The 1877 Leipzig tournament was organized in honor of Anderssen, a German chess master.'\u001b[0m\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   \u001b[0m\u001b[1m{\u001b[0m\n",
-              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Empress Elizabeth of Austria's favorite sculpture, made for her villa Achilleion at Corfu, depicted Achilles.\"\u001b[0m\n",
+              "\u001b[2;32m│   │   │   \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"According to Karl Küchler, Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted the Dying Achilles.\"\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[1m}\u001b[0m\n",
              "\u001b[2;32m│   \u001b[0m\u001b[1m]\u001b[0m,\n",
              "\u001b[2;32m│   \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
              "\u001b[2;32m│   │   \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n",
-              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C.'\u001b[0m\u001b[1m}\u001b[0m,\n",
-              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
+              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n",
              "\u001b[2;32m│   │   │   │   \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m\n",
@ -1350,15 +1332,17 @@
      ],
      "source": [
        "agent_config = {\n",
-        "    \"model\": \"meta-llama/Llama-3.1-405B-Instruct\",\n",
-        "    \"instructions\": \"You are a helpful assistant\",\n",
-        "    \"sampling_params\": {\"strategy\": {\"type\": \"greedy\"}},\n",
-        "    \"tools\": [\n",
-        "        {\n",
-        "            \"type\": \"brave_search\",\n",
-        "            \"engine\": \"tavily\",\n",
-        "            \"api_key\": userdata.get(\"TAVILY_SEARCH_API_KEY\"),\n",
+        "    \"model\": \"meta-llama/Llama-3.3-70B-Instruct\",\n",
+        "    \"instructions\": \"You are a helpful assistant that have access to tool to search the web. \",\n",
+        "    \"sampling_params\": {\n",
+        "        \"strategy\": {\n",
+        "            \"type\": \"top_p\",\n",
+        "            \"temperature\": 0.5,\n",
+        "            \"top_p\": 0.9,\n",
        "        }\n",
+        "    },\n",
+        "    \"toolgroups\": [\n",
+        "        \"builtin::websearch\",\n",
        "    ],\n",
        "    \"tool_choice\": \"auto\",\n",
        "    \"tool_prompt_format\": \"json\",\n",
@ -1381,6 +1365,13 @@
        ")\n",
        "pprint(response)\n"
      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
    }
  ],
  "metadata": {
@ -1396,7 +1387,16 @@
      "name": "python3"
    },
    "language_info": {
-      "name": "python"
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.16"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
--- a/docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb
+++ b/docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb
@ -760,7 +760,7 @@
              "- tool_runtime\n",
              "conda_env: together\n",
              "datasets: <span style=\"font-weight: bold\">[]</span>\n",
-              "docker_image: null\n",
+              "container_image: null\n",
              "eval_tasks: <span style=\"font-weight: bold\">[]</span>\n",
              "image_name: together\n",
              "memory_banks: <span style=\"font-weight: bold\">[]</span>\n",
@ -942,7 +942,7 @@
              "- tool_runtime\n",
              "conda_env: together\n",
              "datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
-              "docker_image: null\n",
+              "container_image: null\n",
              "eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
              "image_name: together\n",
              "memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -6067,6 +6067,76 @@
                    "step"
                ]
            },
+            "AgentTurnInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "agent_turn_input",
+                        "default": "agent_turn_input"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "ArrayType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "array",
+                        "default": "array"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "BooleanType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "boolean",
+                        "default": "boolean"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "ChatCompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "chat_completion_input",
+                        "default": "chat_completion_input"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "CompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "completion_input",
+                        "default": "completion_input"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
            "Dataset": {
                "type": "object",
                "properties": {
@ -6130,79 +6200,7 @@
                    "metadata"
                ]
            },
-            "ParamType": {
-                "oneOf": [
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "string",
-                                "default": "string"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "number",
-                                "default": "number"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "boolean",
-                                "default": "boolean"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "array",
-                                "default": "array"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "object",
-                                "default": "object"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
+            "JsonType": {
                "type": "object",
                "properties": {
                    "type": {
@ -6216,7 +6214,83 @@
                    "type"
                ]
            },
+            "NumberType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "number",
+                        "default": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "ObjectType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "object",
+                        "default": "object"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "ParamType": {
+                "oneOf": [
                    {
+                        "$ref": "#/components/schemas/StringType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/NumberType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/BooleanType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ArrayType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ObjectType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/JsonType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/UnionType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ChatCompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/CompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnInputType"
+                    }
+                ]
+            },
+            "StringType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "string",
+                        "default": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "UnionType": {
                "type": "object",
                "properties": {
                    "type": {
@ -6230,50 +6304,6 @@
                    "type"
                ]
            },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "chat_completion_input",
-                                "default": "chat_completion_input"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "completion_input",
-                                "default": "completion_input"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    },
-                    {
-                        "type": "object",
-                        "properties": {
-                            "type": {
-                                "type": "string",
-                                "const": "agent_turn_input",
-                                "default": "agent_turn_input"
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "type"
-                        ]
-                    }
-                ]
-            },
            "EvalTask": {
                "type": "object",
                "properties": {
@ -8922,6 +8952,10 @@
            "name": "AgentTool",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentTool\" />"
        },
+        {
+            "name": "AgentTurnInputType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentTurnInputType\" />"
+        },
        {
            "name": "AgentTurnResponseEvent",
            "description": "Streamed agent execution response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/AgentTurnResponseEvent\" />"
@ -8965,6 +8999,10 @@
            "name": "AppendRowsRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AppendRowsRequest\" />"
        },
+        {
+            "name": "ArrayType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ArrayType\" />"
+        },
        {
            "name": "BasicScoringFnParams",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BasicScoringFnParams\" />"
@ -8992,6 +9030,10 @@
            "name": "BenchmarkEvalTaskConfig",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BenchmarkEvalTaskConfig\" />"
        },
+        {
+            "name": "BooleanType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BooleanType\" />"
+        },
        {
            "name": "BuiltinTool",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@ -9000,6 +9042,10 @@
            "name": "CancelTrainingJobRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CancelTrainingJobRequest\" />"
        },
+        {
+            "name": "ChatCompletionInputType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionInputType\" />"
+        },
        {
            "name": "ChatCompletionRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
@ -9024,6 +9070,10 @@
            "name": "Checkpoint",
            "description": "Checkpoint created during training runs\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Checkpoint\" />"
        },
+        {
+            "name": "CompletionInputType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionInputType\" />"
+        },
        {
            "name": "CompletionMessage",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
@ -9166,6 +9216,10 @@
            "name": "JobStatus",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/JobStatus\" />"
        },
+        {
+            "name": "JsonType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/JsonType\" />"
+        },
        {
            "name": "KeyValueMemoryBank",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/KeyValueMemoryBank\" />"
@ -9283,6 +9337,14 @@
        {
            "name": "Models"
        },
+        {
+            "name": "NumberType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/NumberType\" />"
+        },
+        {
+            "name": "ObjectType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ObjectType\" />"
+        },
        {
            "name": "OptimizerConfig",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/OptimizerConfig\" />"
@ -9490,6 +9552,10 @@
            "name": "StopReason",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/StopReason\" />"
        },
+        {
+            "name": "StringType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/StringType\" />"
+        },
        {
            "name": "StructuredLogEvent",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/StructuredLogEvent\" />"
@ -9622,6 +9688,10 @@
            "name": "URL",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
        },
+        {
+            "name": "UnionType",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/UnionType\" />"
+        },
        {
            "name": "UnstructuredLogEvent",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/UnstructuredLogEvent\" />"
@ -9682,6 +9752,7 @@
                "AgentSessionCreateResponse",
                "AgentStepResponse",
                "AgentTool",
+                "AgentTurnInputType",
                "AgentTurnResponseEvent",
                "AgentTurnResponseStepCompletePayload",
                "AgentTurnResponseStepProgressPayload",
@ -9692,20 +9763,24 @@
                "AggregationFunctionType",
                "AppEvalTaskConfig",
                "AppendRowsRequest",
+                "ArrayType",
                "BasicScoringFnParams",
                "BatchChatCompletionRequest",
                "BatchChatCompletionResponse",
                "BatchCompletionRequest",
                "BatchCompletionResponse",
                "BenchmarkEvalTaskConfig",
+                "BooleanType",
                "BuiltinTool",
                "CancelTrainingJobRequest",
+                "ChatCompletionInputType",
                "ChatCompletionRequest",
                "ChatCompletionResponse",
                "ChatCompletionResponseEvent",
                "ChatCompletionResponseEventType",
                "ChatCompletionResponseStreamChunk",
                "Checkpoint",
+                "CompletionInputType",
                "CompletionMessage",
                "CompletionRequest",
                "CompletionResponse",
@ -9737,6 +9812,7 @@
                "InvokeToolRequest",
                "Job",
                "JobStatus",
+                "JsonType",
                "KeyValueMemoryBank",
                "KeyValueMemoryBankParams",
                "KeywordMemoryBank",
@ -9764,6 +9840,8 @@
                "Model",
                "ModelCandidate",
                "ModelType",
+                "NumberType",
+                "ObjectType",
                "OptimizerConfig",
                "OptimizerType",
                "PaginatedRowsResult",
@ -9812,6 +9890,7 @@
                "SpanStatus",
                "SpanWithStatus",
                "StopReason",
+                "StringType",
                "StructuredLogEvent",
                "SupervisedFineTuneRequest",
                "SyntheticDataGenerateRequest",
@ -9842,6 +9921,7 @@
                "TrainingConfig",
                "Turn",
                "URL",
+                "UnionType",
                "UnstructuredLogEvent",
                "UserMessage",
                "VectorMemoryBank",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -105,6 +105,16 @@ components:
        - name
        - args
        type: object
+    AgentTurnInputType:
+      additionalProperties: false
+      properties:
+        type:
+          const: agent_turn_input
+          default: agent_turn_input
+          type: string
+      required:
+      - type
+      type: object
    AgentTurnResponseEvent:
      additionalProperties: false
      properties:
@ -290,6 +300,16 @@ components:
      - dataset_id
      - rows
      type: object
+    ArrayType:
+      additionalProperties: false
+      properties:
+        type:
+          const: array
+          default: array
+          type: string
+      required:
+      - type
+      type: object
    BasicScoringFnParams:
      additionalProperties: false
      properties:
@ -395,6 +415,16 @@ components:
      - type
      - eval_candidate
      type: object
+    BooleanType:
+      additionalProperties: false
+      properties:
+        type:
+          const: boolean
+          default: boolean
+          type: string
+      required:
+      - type
+      type: object
    BuiltinTool:
      enum:
      - brave_search
@ -410,6 +440,16 @@ components:
      required:
      - job_uuid
      type: object
+    ChatCompletionInputType:
+      additionalProperties: false
+      properties:
+        type:
+          const: chat_completion_input
+          default: chat_completion_input
+          type: string
+      required:
+      - type
+      type: object
    ChatCompletionRequest:
      additionalProperties: false
      properties:
@ -492,6 +532,16 @@ components:
      type: object
    Checkpoint:
      description: Checkpoint created during training runs
+    CompletionInputType:
+      additionalProperties: false
+      properties:
+        type:
+          const: completion_input
+          default: completion_input
+          type: string
+      required:
+      - type
+      type: object
    CompletionMessage:
      additionalProperties: false
      properties:
@ -1007,6 +1057,16 @@ components:
      - failed
      - scheduled
      type: string
+    JsonType:
+      additionalProperties: false
+      properties:
+        type:
+          const: json
+          default: json
+          type: string
+      required:
+      - type
+      type: object
    KeyValueMemoryBank:
      additionalProperties: false
      properties:
@ -1440,6 +1500,26 @@ components:
      - llm
      - embedding
      type: string
+    NumberType:
+      additionalProperties: false
+      properties:
+        type:
+          const: number
+          default: number
+          type: string
+      required:
+      - type
+      type: object
+    ObjectType:
+      additionalProperties: false
+      properties:
+        type:
+          const: object
+          default: object
+          type: string
+      required:
+      - type
+      type: object
    OptimizerConfig:
      additionalProperties: false
      properties:
@ -1488,96 +1568,16 @@ components:
      type: object
    ParamType:
      oneOf:
-      - additionalProperties: false
-        properties:
-          type:
-            const: string
-            default: string
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: number
-            default: number
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: boolean
-            default: boolean
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: array
-            default: array
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: object
-            default: object
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: json
-            default: json
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: union
-            default: union
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: chat_completion_input
-            default: chat_completion_input
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: completion_input
-            default: completion_input
-            type: string
-        required:
-        - type
-        type: object
-      - additionalProperties: false
-        properties:
-          type:
-            const: agent_turn_input
-            default: agent_turn_input
-            type: string
-        required:
-        - type
-        type: object
+      - $ref: '#/components/schemas/StringType'
+      - $ref: '#/components/schemas/NumberType'
+      - $ref: '#/components/schemas/BooleanType'
+      - $ref: '#/components/schemas/ArrayType'
+      - $ref: '#/components/schemas/ObjectType'
+      - $ref: '#/components/schemas/JsonType'
+      - $ref: '#/components/schemas/UnionType'
+      - $ref: '#/components/schemas/ChatCompletionInputType'
+      - $ref: '#/components/schemas/CompletionInputType'
+      - $ref: '#/components/schemas/AgentTurnInputType'
    PostTrainingJob:
      additionalProperties: false
      properties:
@ -2479,6 +2479,16 @@ components:
      - end_of_message
      - out_of_tokens
      type: string
+    StringType:
+      additionalProperties: false
+      properties:
+        type:
+          const: string
+          default: string
+          type: string
+      required:
+      - type
+      type: object
    StructuredLogEvent:
      additionalProperties: false
      properties:
@ -3131,6 +3141,16 @@ components:
      required:
      - uri
      type: object
+    UnionType:
+      additionalProperties: false
+      properties:
+        type:
+          const: union
+          default: union
+          type: string
+      required:
+      - type
+      type: object
    UnstructuredLogEvent:
      additionalProperties: false
      properties:
@ -5588,6 +5608,9 @@ tags:
  name: AgentStepResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgentTool" />
  name: AgentTool
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgentTurnInputType"
+    />
+  name: AgentTurnInputType
 - description: 'Streamed agent execution response.


@ -5624,6 +5647,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/AppendRowsRequest"
    />
  name: AppendRowsRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/ArrayType" />
+  name: ArrayType
 - description: <SchemaDefinition schemaRef="#/components/schemas/BasicScoringFnParams"
    />
  name: BasicScoringFnParams
@ -5643,11 +5668,16 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/BenchmarkEvalTaskConfig"
    />
  name: BenchmarkEvalTaskConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/BooleanType" />
+  name: BooleanType
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CancelTrainingJobRequest"
    />
  name: CancelTrainingJobRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionInputType"
+    />
+  name: ChatCompletionInputType
 - description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
    />
  name: ChatCompletionRequest
@ -5676,6 +5706,9 @@ tags:

    <SchemaDefinition schemaRef="#/components/schemas/Checkpoint" />'
  name: Checkpoint
+- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionInputType"
+    />
+  name: CompletionInputType
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
    />
  name: CompletionMessage
@ -5770,6 +5803,8 @@ tags:
  name: Job
 - description: <SchemaDefinition schemaRef="#/components/schemas/JobStatus" />
  name: JobStatus
+- description: <SchemaDefinition schemaRef="#/components/schemas/JsonType" />
+  name: JsonType
 - description: <SchemaDefinition schemaRef="#/components/schemas/KeyValueMemoryBank"
    />
  name: KeyValueMemoryBank
@ -5847,6 +5882,10 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/ModelType" />
  name: ModelType
 - name: Models
+- description: <SchemaDefinition schemaRef="#/components/schemas/NumberType" />
+  name: NumberType
+- description: <SchemaDefinition schemaRef="#/components/schemas/ObjectType" />
+  name: ObjectType
 - description: <SchemaDefinition schemaRef="#/components/schemas/OptimizerConfig"
    />
  name: OptimizerConfig
@ -5988,6 +6027,8 @@ tags:
  name: SpanWithStatus
 - description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
  name: StopReason
+- description: <SchemaDefinition schemaRef="#/components/schemas/StringType" />
+  name: StringType
 - description: <SchemaDefinition schemaRef="#/components/schemas/StructuredLogEvent"
    />
  name: StructuredLogEvent
@ -6080,6 +6121,8 @@ tags:
  name: Turn
 - description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
  name: URL
+- description: <SchemaDefinition schemaRef="#/components/schemas/UnionType" />
+  name: UnionType
 - description: <SchemaDefinition schemaRef="#/components/schemas/UnstructuredLogEvent"
    />
  name: UnstructuredLogEvent
@ -6126,6 +6169,7 @@ x-tagGroups:
  - AgentSessionCreateResponse
  - AgentStepResponse
  - AgentTool
+  - AgentTurnInputType
  - AgentTurnResponseEvent
  - AgentTurnResponseStepCompletePayload
  - AgentTurnResponseStepProgressPayload
@ -6136,20 +6180,24 @@ x-tagGroups:
  - AggregationFunctionType
  - AppEvalTaskConfig
  - AppendRowsRequest
+  - ArrayType
  - BasicScoringFnParams
  - BatchChatCompletionRequest
  - BatchChatCompletionResponse
  - BatchCompletionRequest
  - BatchCompletionResponse
  - BenchmarkEvalTaskConfig
+  - BooleanType
  - BuiltinTool
  - CancelTrainingJobRequest
+  - ChatCompletionInputType
  - ChatCompletionRequest
  - ChatCompletionResponse
  - ChatCompletionResponseEvent
  - ChatCompletionResponseEventType
  - ChatCompletionResponseStreamChunk
  - Checkpoint
+  - CompletionInputType
  - CompletionMessage
  - CompletionRequest
  - CompletionResponse
@ -6181,6 +6229,7 @@ x-tagGroups:
  - InvokeToolRequest
  - Job
  - JobStatus
+  - JsonType
  - KeyValueMemoryBank
  - KeyValueMemoryBankParams
  - KeywordMemoryBank
@ -6208,6 +6257,8 @@ x-tagGroups:
  - Model
  - ModelCandidate
  - ModelType
+  - NumberType
+  - ObjectType
  - OptimizerConfig
  - OptimizerType
  - PaginatedRowsResult
@ -6256,6 +6307,7 @@ x-tagGroups:
  - SpanStatus
  - SpanWithStatus
  - StopReason
+  - StringType
  - StructuredLogEvent
  - SupervisedFineTuneRequest
  - SyntheticDataGenerateRequest
@ -6286,6 +6338,7 @@ x-tagGroups:
  - TrainingConfig
  - Turn
  - URL
+  - UnionType
  - UnstructuredLogEvent
  - UserMessage
  - VectorMemoryBank
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@ -17,13 +17,13 @@ pip install -e .
 llama stack build -h
 ```

-We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify:
+We will start build our distribution (in the form of a Conda environment, or Container image). In this step, we will specify:
 - `name`: the name for our distribution (e.g. `my-stack`)
- `image_type`: our build image type (`conda | docker`)
+- `image_type`: our build image type (`conda | container`)
 - `distribution_spec`: our distribution specs for specifying API providers
  - `description`: a short description of the configurations for the distribution
  - `providers`: specifies the underlying implementation for serving each API endpoint
-  - `image_type`: `conda` | `docker` to specify whether to build the distribution in the form of Docker image or Conda environment.
+  - `image_type`: `conda` | `container` to specify whether to build the distribution in the form of Container image or Conda environment.

 After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.

@ -35,7 +35,7 @@ After this step is complete, a file named `<name>-build.yaml` and template file
 llama stack build

 > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
-> Enter the image type you want your Llama Stack to be built as (docker or conda): conda
+> Enter the image type you want your Llama Stack to be built as (container or conda): conda

 Llama Stack is composed of several APIs working together. Let's select
 the provider types (implementations) you want to use for these APIs.
@ -348,26 +348,26 @@ llama stack build --config llama_stack/templates/ollama/build.yaml
 ```
 :::

-:::{tab-item} Building Docker
+:::{tab-item} Building Container
 > [!TIP]
-> Podman is supported as an alternative to Docker. Set `DOCKER_BINARY` to `podman` in your environment to use Podman.
+> Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.

-To build a docker image, you may start off from a template and use the `--image-type docker` flag to specify `docker` as the build image type.
+To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.

 ```
-llama stack build --template ollama --image-type docker
+llama stack build --template ollama --image-type container
 ```

 ```
-$ llama stack build --template ollama --image-type docker
+$ llama stack build --template ollama --image-type container
 ...
-Dockerfile created successfully in /tmp/tmp.viA3a3Rdsg/DockerfileFROM python:3.10-slim
+Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
 ...

 You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
 ```

-After this step is successful, you should be able to find the built docker image and test it with `llama stack run <path/to/run.yaml>`.
+After this step is successful, you should be able to find the built container image and test it with `llama stack run <path/to/run.yaml>`.
 :::

 ::::
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/distributions/remote_hosted_distro/nvidia.md
@ -12,7 +12,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 ### Environment Variables
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@ -19,7 +19,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
 | safety | `remote::bedrock` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |



--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@ -22,7 +22,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 ### Environment Variables
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@ -22,7 +22,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@ -22,7 +22,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@ -14,11 +14,14 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::vllm` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@ -23,7 +23,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -22,7 +22,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |


 ### Environment Variables
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@ -1,6 +1,6 @@
 # Quick Start

-In this guide, we'll through how you can use the Llama Stack client SDK to build a simple RAG agent.
+In this guide, we'll walk through how you can use the Llama Stack client SDK to build a simple RAG agent.

 The most critical requirement for running the agent is running inference on the underlying Llama model. Depending on what hardware (GPUs) you have available, you have various options. We will use `Ollama` for this purpose as it is the easiest to get started with and yet robust.

--- a/llama_stack/apis/common/type_system.py
+++ b/llama_stack/apis/common/type_system.py
@ -6,54 +6,65 @@

 from typing import Literal, Union

-from llama_models.schema_utils import register_schema
+from llama_models.schema_utils import json_schema_type, register_schema
 from pydantic import BaseModel, Field
 from typing_extensions import Annotated


+@json_schema_type
 class StringType(BaseModel):
    type: Literal["string"] = "string"


+@json_schema_type
 class NumberType(BaseModel):
    type: Literal["number"] = "number"


+@json_schema_type
 class BooleanType(BaseModel):
    type: Literal["boolean"] = "boolean"


+@json_schema_type
 class ArrayType(BaseModel):
    type: Literal["array"] = "array"


+@json_schema_type
 class ObjectType(BaseModel):
    type: Literal["object"] = "object"


+@json_schema_type
 class JsonType(BaseModel):
    type: Literal["json"] = "json"


+@json_schema_type
 class UnionType(BaseModel):
    type: Literal["union"] = "union"


+@json_schema_type
 class ChatCompletionInputType(BaseModel):
    # expects List[Message] for messages
    type: Literal["chat_completion_input"] = "chat_completion_input"


+@json_schema_type
 class CompletionInputType(BaseModel):
    # expects InterleavedTextMedia for content
    type: Literal["completion_input"] = "completion_input"


+@json_schema_type
 class AgentTurnInputType(BaseModel):
    # expects List[Message] for messages (may also include attachments?)
    type: Literal["agent_turn_input"] = "agent_turn_input"


+@json_schema_type
 class DialogType(BaseModel):
    # expects List[Message] for messages
    # this type semantically contains the output label whereas ChatCompletionInputType does not
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -182,8 +182,8 @@ def _generate_run_config(
    """
    apis = list(build_config.distribution_spec.providers.keys())
    run_config = StackRunConfig(
-        docker_image=(
-            image_name if build_config.image_type == ImageType.docker.value else None
+        container_image=(
+            image_name if build_config.image_type == ImageType.container.value else None
        ),
        image_name=image_name,
        apis=apis,
@ -238,7 +238,7 @@ def _run_stack_build_command_from_build_config(
    image_name: Optional[str] = None,
    template_name: Optional[str] = None,
 ) -> None:
-    if build_config.image_type == ImageType.docker.value:
+    if build_config.image_type == ImageType.container.value:
        if template_name:
            image_name = f"distribution-{template_name}"
        else:
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -47,8 +47,8 @@ class StackBuild(Subcommand):
        self.parser.add_argument(
            "--image-type",
            type=str,
-            help="Image Type to use for the build. This can be either conda or docker. If not specified, will use the image type from the template config.",
-            choices=["conda", "docker", "venv"],
+            help="Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config.",
+            choices=["conda", "container", "venv"],
            default="conda",
        )

--- a/llama_stack/cli/stack/configure.py
+++ b/llama_stack/cli/stack/configure.py
@ -27,7 +27,7 @@ class StackConfigure(Subcommand):
        self.parser.add_argument(
            "config",
            type=str,
-            help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For docker, this could also be the name of the docker image. ",
+            help="Path to the build config file (e.g. ~/.llama/builds/<image_type>/<name>-build.yaml). For container, this could also be the name of the container image. ",
        )

        self.parser.add_argument(
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -92,9 +92,9 @@ class StackRun(Subcommand):
            )

        if not config_file.exists() and not has_yaml_suffix:
-            # check if it's a build config saved to docker dir
+            # check if it's a build config saved to container dir
            config_file = Path(
-                BUILDS_BASE_DIR / ImageType.docker.value / f"{args.config}-run.yaml"
+                BUILDS_BASE_DIR / ImageType.container.value / f"{args.config}-run.yaml"
            )

        if not config_file.exists() and not has_yaml_suffix:
@ -115,12 +115,12 @@ class StackRun(Subcommand):
        config_dict = yaml.safe_load(config_file.read_text())
        config = parse_and_maybe_upgrade_config(config_dict)

-        if config.docker_image:
+        if config.container_image:
            script = (
                importlib.resources.files("llama_stack")
                / "distribution/start_container.sh"
            )
-            run_args = [script, config.docker_image]
+            run_args = [script, config.container_image]
        else:
            current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
            image_name = args.image_name or current_conda_env
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@ -38,7 +38,7 @@ SERVER_DEPENDENCIES = [


 class ImageType(Enum):
-    docker = "docker"
+    container = "container"
    conda = "conda"
    venv = "venv"

@ -77,8 +77,8 @@ def get_provider_dependencies(

            provider_spec = providers_for_api[provider_type]
            deps.extend(provider_spec.pip_packages)
-            if provider_spec.docker_image:
-                raise ValueError("A stack's dependencies cannot have a docker image")
+            if provider_spec.container_image:
+                raise ValueError("A stack's dependencies cannot have a container image")

    normal_deps = []
    special_deps = []
@ -109,23 +109,25 @@ def build_image(
    image_name: str,
    template_name: Optional[str] = None,
 ):
-    docker_image = build_config.distribution_spec.docker_image or "python:3.10-slim"
+    container_image = (
+        build_config.distribution_spec.container_image or "python:3.10-slim"
+    )

    normal_deps, special_deps = get_provider_dependencies(
        build_config.distribution_spec.providers
    )
    normal_deps += SERVER_DEPENDENCIES

-    if build_config.image_type == ImageType.docker.value:
+    if build_config.image_type == ImageType.container.value:
        script = str(
            importlib.resources.files("llama_stack") / "distribution/build_container.sh"
        )
        args = [
            script,
            image_name,
-            docker_image,
+            container_image,
            str(build_file_path),
-            str(BUILDS_BASE_DIR / ImageType.docker.value),
+            str(BUILDS_BASE_DIR / ImageType.container.value),
            " ".join(normal_deps),
        ]
    elif build_config.image_type == ImageType.conda.value:
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@ -13,7 +13,7 @@ PYPI_VERSION=${PYPI_VERSION:-}
 BUILD_PLATFORM=${BUILD_PLATFORM:-}

 if [ "$#" -lt 4 ]; then
-  echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <build_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
  echo "Example: $0 my-fastapi-app python:3.9-slim 'fastapi uvicorn' " >&2
  exit 1
 fi
@ -24,7 +24,7 @@ set -euo pipefail

 build_name="$1"
 image_name="distribution-$build_name"
-docker_base=$2
+container_base=$2
 build_file_path=$3
 host_build_dir=$4
 pip_dependencies=$5
@ -36,14 +36,14 @@ NC='\033[0m' # No Color

 SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 REPO_DIR=$(dirname $(dirname "$SCRIPT_DIR"))
-DOCKER_BINARY=${DOCKER_BINARY:-docker}
-DOCKER_OPTS=${DOCKER_OPTS:-}
+CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
+CONTAINER_OPTS=${CONTAINER_OPTS:-}

 TEMP_DIR=$(mktemp -d)

-add_to_docker() {
+add_to_container() {
  local input
-  output_file="$TEMP_DIR/Dockerfile"
+  output_file="$TEMP_DIR/Containerfile"
  if [ -t 0 ]; then
    printf '%s\n' "$1" >>"$output_file"
  else
@ -53,9 +53,9 @@ add_to_docker() {
 }

 # Update and install UBI9 components if UBI9 base image is used
-if [[ $docker_base == *"registry.access.redhat.com/ubi9"* ]]; then
-  add_to_docker << EOF
-FROM $docker_base
+if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
+  add_to_container << EOF
+FROM $container_base
 WORKDIR /app

 RUN microdnf -y update && microdnf install -y iputils net-tools wget \
@ -64,8 +64,8 @@ RUN microdnf -y update && microdnf install -y iputils net-tools wget \

 EOF
 else
-  add_to_docker << EOF
-FROM $docker_base
+  add_to_container << EOF
+FROM $container_base
 WORKDIR /app

 RUN apt-get update && apt-get install -y \
@ -82,7 +82,7 @@ fi
 # Add pip dependencies first since llama-stack is what will change most often
 # so we can reuse layers.
 if [ -n "$pip_dependencies" ]; then
-  add_to_docker << EOF
+  add_to_container << EOF
 RUN pip install --no-cache $pip_dependencies
 EOF
 fi
@ -90,7 +90,7 @@ fi
 if [ -n "$special_pip_deps" ]; then
  IFS='#' read -ra parts <<<"$special_pip_deps"
  for part in "${parts[@]}"; do
-    add_to_docker <<EOF
+    add_to_container <<EOF
 RUN pip install --no-cache $part
 EOF
  done
@ -108,16 +108,16 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
  # Install in editable format. We will mount the source code into the container
  # so that changes will be reflected in the container without having to do a
  # rebuild. This is just for development convenience.
-  add_to_docker << EOF
+  add_to_container << EOF
 RUN pip install --no-cache -e $stack_mount
 EOF
 else
  if [ -n "$TEST_PYPI_VERSION" ]; then
    # these packages are damaged in test-pypi, so install them first
-    add_to_docker << EOF
+    add_to_container << EOF
 RUN pip install fastapi libcst
 EOF
-    add_to_docker << EOF
+    add_to_container << EOF
 RUN pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
  llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION

@ -128,7 +128,7 @@ EOF
    else
      SPEC_VERSION="llama-stack"
    fi
-    add_to_docker << EOF
+    add_to_container << EOF
 RUN pip install --no-cache $SPEC_VERSION
 EOF
  fi
@ -140,14 +140,14 @@ if [ -n "$LLAMA_MODELS_DIR" ]; then
    exit 1
  fi

-  add_to_docker << EOF
+  add_to_container << EOF
 RUN pip uninstall -y llama-models
 RUN pip install --no-cache $models_mount

 EOF
 fi

-add_to_docker << EOF
+add_to_container << EOF

 # This would be good in production but for debugging flexibility lets not add it right now
 # We need a more solid production ready entrypoint.sh anyway
@ -156,8 +156,8 @@ ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--templat

 EOF

-printf "Dockerfile created successfully in $TEMP_DIR/Dockerfile\n\n"
-cat $TEMP_DIR/Dockerfile
+printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n"
+cat $TEMP_DIR/Containerfile
 printf "\n"

 mounts=""
@ -170,7 +170,7 @@ fi

 if command -v selinuxenabled &>/dev/null && selinuxenabled; then
  # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
-  DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
+  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
 fi

 # Set version tag based on PyPI version
@ -200,7 +200,7 @@ else
 fi

 set -x
-$DOCKER_BINARY build $DOCKER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts
+$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag -f "$TEMP_DIR/Containerfile" "$REPO_DIR" $mounts

 # clean up tmp/configs
 set +x
--- a/llama_stack/distribution/configure_container.sh
+++ b/llama_stack/distribution/configure_container.sh
@ -6,8 +6,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-DOCKER_BINARY=${DOCKER_BINARY:-docker}
-DOCKER_OPTS=${DOCKER_OPTS:-}
+CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
+CONTAINER_OPTS=${CONTAINER_OPTS:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}

 set -euo pipefail
@ -24,13 +24,13 @@ if [ $# -lt 2 ]; then
  exit 1
 fi

-docker_image="$1"
+container_image="$1"
 host_build_dir="$2"
 container_build_dir="/app/builds"

 if command -v selinuxenabled &> /dev/null && selinuxenabled; then
  # Disable SELinux labels
-  DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
+  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
 fi

 mounts=""
@ -39,9 +39,9 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
 fi

 set -x
-$DOCKER_BINARY run $DOCKER_OPTS -it \
+$CONTAINER_BINARY run $CONTAINER_OPTS -it \
  --entrypoint "/usr/local/bin/llama" \
  -v $host_build_dir:$container_build_dir \
  $mounts \
-  $docker_image \
+  $container_image \
  stack configure ./llamastack-build.yaml --output-dir $container_build_dir
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -73,7 +73,7 @@ class AutoRoutedProviderSpec(ProviderSpec):
    provider_type: str = "router"
    config_class: str = ""

-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None
    routing_table_api: Api
    module: str
    provider_data_validator: Optional[str] = Field(
@ -89,7 +89,7 @@ class AutoRoutedProviderSpec(ProviderSpec):
 class RoutingTableProviderSpec(ProviderSpec):
    provider_type: str = "routing_table"
    config_class: str = ""
-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None

    router_api: Api
    module: str
@ -101,7 +101,7 @@ class DistributionSpec(BaseModel):
        default="",
        description="Description of the distribution",
    )
-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None
    providers: Dict[str, Union[str, List[str]]] = Field(
        default_factory=dict,
        description="""
@ -127,9 +127,9 @@ Reference to the distribution this package refers to. For unregistered (adhoc) p
 this could be just a hash
 """,
    )
-    docker_image: Optional[str] = Field(
+    container_image: Optional[str] = Field(
        default=None,
-        description="Reference to the docker image if this package refers to a container",
+        description="Reference to the container image if this package refers to a container",
    )
    apis: List[str] = Field(
        default_factory=list,
@ -168,5 +168,5 @@ class BuildConfig(BaseModel):
    )
    image_type: str = Field(
        default="conda",
-        description="Type of package to build (conda | docker | venv)",
+        description="Type of package to build (conda | container | venv)",
    )
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -145,7 +145,9 @@ async def resolve_impls(
                log.warning(
                    f"Provider `{provider.provider_type}` for API `{api}` is deprecated and will be removed in a future release: {p.deprecation_warning}",
                )
-            p.deps__ = [a.value for a in p.api_dependencies]
+            p.deps__ = [a.value for a in p.api_dependencies] + [
+                a.value for a in p.optional_api_dependencies
+            ]
            spec = ProviderWithSpec(
                spec=p,
                **(provider.model_dump()),
@ -229,6 +231,9 @@ async def resolve_impls(
    inner_impls_by_provider_id = {f"inner-{x.value}": {} for x in router_apis}
    for api_str, provider in sorted_providers:
        deps = {a: impls[a] for a in provider.spec.api_dependencies}
+        for a in provider.spec.optional_api_dependencies:
+            if a in impls:
+                deps[a] = impls[a]

        inner_impls = {}
        if isinstance(provider.spec, RoutingTableProviderSpec):
@ -265,7 +270,7 @@ def topological_sort(
                deps.append(dep)

        for dep in deps:
-            if dep not in visited:
+            if dep not in visited and dep in providers_with_specs:
                dfs((dep, providers_with_specs[dep]), visited, stack)

        stack.append(api_str)
--- a/llama_stack/distribution/start_container.sh
+++ b/llama_stack/distribution/start_container.sh
@ -6,8 +6,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-DOCKER_BINARY=${DOCKER_BINARY:-docker}
-DOCKER_OPTS=${DOCKER_OPTS:-}
+CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
+CONTAINER_OPTS=${CONTAINER_OPTS:-}
 LLAMA_CHECKPOINT_DIR=${LLAMA_CHECKPOINT_DIR:-}
 LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
 TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
@ -31,7 +31,7 @@ if [ $# -lt 3 ]; then
 fi

 build_name="$1"
-docker_image="localhost/distribution-$build_name"
+container_image="localhost/distribution-$build_name"
 shift

 yaml_config="$1"
@ -64,7 +64,7 @@ set -x

 if command -v selinuxenabled &> /dev/null && selinuxenabled; then
  # Disable SELinux labels
-  DOCKER_OPTS="$DOCKER_OPTS --security-opt label=disable"
+  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
 fi

 mounts=""
@ -73,7 +73,7 @@ if [ -n "$LLAMA_STACK_DIR" ]; then
 fi
 if [ -n "$LLAMA_CHECKPOINT_DIR" ]; then
  mounts="$mounts -v $LLAMA_CHECKPOINT_DIR:/root/.llama"
-  DOCKER_OPTS="$DOCKER_OPTS --gpus=all"
+  CONTAINER_OPTS="$CONTAINER_OPTS --gpus=all"
 fi

 version_tag="latest"
@ -85,11 +85,11 @@ elif [ -n "$TEST_PYPI_VERSION" ]; then
  version_tag="test-$TEST_PYPI_VERSION"
 fi

-$DOCKER_BINARY run $DOCKER_OPTS -it \
+$CONTAINER_BINARY run $CONTAINER_OPTS -it \
  -p $port:$port \
  $env_vars \
  -v "$yaml_config:/app/config.yaml" \
  $mounts \
  --env LLAMA_STACK_PORT=$port \
  --entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
-  $docker_image:$version_tag
+  $container_image:$version_tag
--- a/llama_stack/distribution/ui/page/distribution/datasets.py
+++ b/llama_stack/distribution/ui/page/distribution/datasets.py
@ -14,6 +14,6 @@ def datasets():
    datasets_info = {
        d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()
    }
-
+    if len(datasets_info) > 0:
        selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
        st.json(datasets_info[selected_dataset], expanded=True)
--- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py
+++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py
@ -16,6 +16,7 @@ def eval_tasks():
        d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list()
    }

+    if len(eval_tasks_info) > 0:
        selected_eval_task = st.selectbox(
            "Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
        )
--- a/llama_stack/distribution/ui/page/distribution/providers.py
+++ b/llama_stack/distribution/ui/page/distribution/providers.py
@ -10,11 +10,17 @@ from modules.api import llama_stack_api

 def providers():
    st.header("🔍 API Providers")
-    apis_providers_info = llama_stack_api.client.providers.list()
-    # selected_api = st.selectbox("Select an API", list(apis_providers_info.keys()))
-    for api in apis_providers_info.keys():
+    apis_providers_lst = llama_stack_api.client.providers.list()
+    api_to_providers = {}
+    for api_provider in apis_providers_lst:
+        if api_provider.api in api_to_providers:
+            api_to_providers[api_provider.api].append(api_provider)
+        else:
+            api_to_providers[api_provider.api] = [api_provider]
+
+    for api in api_to_providers.keys():
        st.markdown(f"###### {api}")
-        st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500)
+        st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)


 providers()
--- a/llama_stack/distribution/ui/page/playground/chat.py
+++ b/llama_stack/distribution/ui/page/playground/chat.py
@ -121,7 +121,7 @@ if prompt := st.chat_input("Example: What is Llama Stack?"):
        if stream:
            for chunk in response:
                if chunk.event.event_type == "progress":
-                    full_response += chunk.event.delta
+                    full_response += chunk.event.delta.text
                message_placeholder.markdown(full_response + "▌")
            message_placeholder.markdown(full_response)
        else:
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@ -44,14 +44,21 @@ def rag_chat_page():
                ]

                providers = llama_stack_api.client.providers.list()
+                memory_provider = None
+
+                for x in providers:
+                    if x.api == "memory":
+                        memory_provider = x.provider_id
+
                llama_stack_api.client.memory_banks.register(
                    memory_bank_id=memory_bank_name,  # Use the user-provided name
                    params={
+                        "memory_bank_type": "vector",
                        "embedding_model": "all-MiniLM-L6-v2",
                        "chunk_size_in_tokens": 512,
                        "overlap_size_in_tokens": 64,
                    },
-                    provider_id=providers["memory"][0].provider_id,
+                    provider_id=memory_provider,
                )

                # insert documents using the custom bank name
@ -69,9 +76,6 @@ def rag_chat_page():
            "Select Memory Banks",
            memory_banks,
        )
-        memory_bank_configs = [
-            {"bank_id": bank_id, "type": "vector"} for bank_id in selected_memory_banks
-        ]

        available_models = llama_stack_api.client.models.list()
        available_models = [
@ -133,14 +137,13 @@ def rag_chat_page():
        sampling_params={
            "strategy": strategy,
        },
-        tools=[
-            {
-                "type": "memory",
-                "memory_bank_configs": memory_bank_configs,
-                "query_generator_config": {"type": "default", "sep": " "},
-                "max_tokens_in_context": 4096,
-                "max_chunks": 10,
-            }
+        toolgroups=[
+            dict(
+                name="builtin::memory",
+                args={
+                    "memory_bank_ids": [bank_id for bank_id in selected_memory_banks],
+                },
+            )
        ],
        tool_choice="auto",
        tool_prompt_format="json",
@ -179,7 +182,7 @@ def rag_chat_page():
            retrieval_response = ""
            for log in EventLogger().log(response):
                log.print()
-                if log.role == "memory_retrieval":
+                if log.role == "tool_execution":
                    retrieval_response += log.content.replace("====", "").strip()
                    retrieval_message_placeholder.info(retrieval_response)
                else:
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@ -96,6 +96,9 @@ class ProviderSpec(BaseModel):
        default_factory=list,
        description="Higher-level API surfaces may depend on other providers to provide their functionality",
    )
+    optional_api_dependencies: List[Api] = Field(
+        default_factory=list,
+    )
    deprecation_warning: Optional[str] = Field(
        default=None,
        description="If this provider is deprecated, specify the warning message here",
@ -147,11 +150,11 @@ class InlineProviderSpec(ProviderSpec):
        default_factory=list,
        description="The pip dependencies needed for this implementation",
    )
-    docker_image: Optional[str] = Field(
+    container_image: Optional[str] = Field(
        default=None,
        description="""
-The docker image to use for this implementation. If one is provided, pip_packages will be ignored.
-If a provider depends on other providers, the dependencies MUST NOT specify a docker image.
+The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+If a provider depends on other providers, the dependencies MUST NOT specify a container image.
 """,
    )
    module: str = Field(
@ -194,7 +197,7 @@ API responses, specify the adapter here.
    )

    @property
-    def docker_image(self) -> Optional[str]:
+    def container_image(self) -> Optional[str]:
        return None

    @property
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -72,7 +72,7 @@ def is_tracing_enabled(tracer):
 class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
    def __init__(self, config: TelemetryConfig, deps: Dict[str, Any]) -> None:
        self.config = config
-        self.datasetio_api = deps[Api.datasetio]
+        self.datasetio_api = deps.get(Api.datasetio)

        resource = Resource.create(
            {
--- a/llama_stack/providers/registry/telemetry.py
+++ b/llama_stack/providers/registry/telemetry.py
@ -24,7 +24,7 @@ def available_providers() -> List[ProviderSpec]:
                "opentelemetry-sdk",
                "opentelemetry-exporter-otlp-proto-http",
            ],
-            api_dependencies=[Api.datasetio],
+            optional_api_dependencies=[Api.datasetio],
            module="llama_stack.providers.inline.telemetry.meta_reference",
            config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
        ),
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -176,7 +176,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        media_present = request_has_media(request)
        if isinstance(request, ChatCompletionRequest):
            if media_present:
-                # vllm does not seem to work well with image urls, so we download the images
                input_dict["messages"] = [
                    await convert_message_to_openai_dict(m, download=True)
                    for m in request.messages
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -6,15 +6,15 @@

 from typing import AsyncGenerator, Dict, List, Optional

-from llama_models.llama3.api.chat_format import ChatFormat
-
-from llama_models.llama3.api.datatypes import (
+from llama_models.datatypes import (
    GreedySamplingStrategy,
    SamplingParams,
-    StopReason,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
+
+from llama_models.llama3.api.chat_format import ChatFormat
+from llama_models.llama3.api.datatypes import StopReason
 from pydantic import BaseModel

 from llama_stack.apis.common.content_types import (
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -188,7 +188,7 @@ async def localize_image_content(media: ImageContentItem) -> Tuple[bytes, str]:
 async def convert_image_content_to_url(
    media: ImageContentItem, download: bool = False, include_format: bool = True
 ) -> str:
-    if media.url and not download:
+    if media.url and (not download or media.url.uri.startswith("data")):
        return media.url.uri

    content, format = await localize_image_content(media)
--- a/llama_stack/providers/utils/telemetry/dataset_mixin.py
+++ b/llama_stack/providers/utils/telemetry/dataset_mixin.py
@ -22,6 +22,9 @@ class TelemetryDatasetMixin:
        dataset_id: str,
        max_depth: Optional[int] = None,
    ) -> None:
+        if self.datasetio_api is None:
+            raise RuntimeError("DatasetIO API not available")
+
        spans = await self.query_spans(
            attribute_filters=attribute_filters,
            attributes_to_return=attributes_to_save,
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -30,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "bedrock"
@ -70,7 +71,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use AWS Bedrock for running LLM inference and safety",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -81,6 +81,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -92,7 +92,7 @@ def get_distribution_template() -> DistributionTemplate:
        name="cerebras",
        distro_type="self_hosted",
        description="Use Cerebras for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/experimental-post-training/build.yaml
+++ b/llama_stack/templates/experimental-post-training/build.yaml
@ -2,7 +2,7 @@ version: '2'
 name: experimental-post-training
 distribution_spec:
  description: Experimental template for post training
-  docker_image: null
+  container_image: null
  providers:
    inference:
    - inline::meta-reference
--- a/llama_stack/templates/experimental-post-training/run.yaml
+++ b/llama_stack/templates/experimental-post-training/run.yaml
@ -1,6 +1,6 @@
 version: '2'
 image_name: experimental-post-training
-docker_image: null
+container_image: null
 conda_env: experimental-post-training
 apis:
 - agents
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -98,7 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use Fireworks.AI for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -92,6 +92,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "hf-endpoint"
@ -88,7 +89,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -91,6 +91,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -34,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -89,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -91,6 +91,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -38,6 +38,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "meta-reference-gpu"
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -93,6 +93,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -87,6 +87,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    default_tool_groups = [
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@ -89,6 +89,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -26,4 +26,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -29,6 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -68,7 +69,7 @@ def get_distribution_template() -> DistributionTemplate:
        name="nvidia",
        distro_type="remote_hosted",
        description="Use NVIDIA NIM for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -83,6 +83,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@ -90,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) Ollama server for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@ -12,6 +12,15 @@ distribution_spec:
    - inline::llama-guard
    agents:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
    telemetry:
    - inline::meta-reference
    tool_runtime:
@ -19,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -2,9 +2,12 @@ version: '2'
 image_name: remote-vllm
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 - tool_runtime
 providers:
@ -44,6 +47,28 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -68,6 +93,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -2,9 +2,12 @@ version: '2'
 image_name: remote-vllm
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 - tool_runtime
 providers:
@ -38,6 +41,28 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -62,6 +87,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -27,12 +27,16 @@ def get_distribution_template() -> DistributionTemplate:
        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
        "telemetry": ["inline::meta-reference"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "remote-vllm"
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -37,7 +37,7 @@ class RunConfigSettings(BaseModel):
        self,
        name: str,
        providers: Dict[str, List[str]],
-        docker_image: Optional[str] = None,
+        container_image: Optional[str] = None,
    ) -> StackRunConfig:
        provider_registry = get_provider_registry()

@ -83,7 +83,7 @@ class RunConfigSettings(BaseModel):

        return StackRunConfig(
            image_name=name,
-            docker_image=docker_image,
+            container_image=container_image,
            apis=apis,
            providers=provider_configs,
            metadata_store=SqliteKVStoreConfig.sample_run_config(
@ -112,7 +112,7 @@ class DistributionTemplate(BaseModel):

    # Optional configuration
    run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
-    docker_image: Optional[str] = None
+    container_image: Optional[str] = None

    default_models: Optional[List[ModelInput]] = None

@ -121,7 +121,7 @@ class DistributionTemplate(BaseModel):
            name=self.name,
            distribution_spec=DistributionSpec(
                description=self.description,
-                docker_image=self.docker_image,
+                container_image=self.container_image,
                providers=self.providers,
            ),
            image_type="conda",  # default to conda, can be overridden
@ -169,7 +169,7 @@ class DistributionTemplate(BaseModel):

        for yaml_pth, settings in self.run_configs.items():
            run_config = settings.run_config(
-                self.name, self.providers, self.docker_image
+                self.name, self.providers, self.container_image
            )
            with open(yaml_output_dir / yaml_pth, "w") as f:
                yaml.safe_dump(
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -85,6 +85,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -36,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "tgi"
@ -92,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use (an external) TGI server for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -92,6 +92,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -86,6 +86,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -39,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }
    name = "together"
@ -96,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use Together.AI for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=default_models,
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@ -28,4 +28,5 @@ distribution_spec:
    - remote::tavily-search
    - inline::code-interpreter
    - inline::memory-runtime
+    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -89,6 +89,9 @@ providers:
  - provider_id: memory-runtime
    provider_type: inline::memory-runtime
    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@ -33,6 +33,7 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::memory-runtime",
+            "remote::model-context-protocol",
        ],
    }

@ -84,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
        name=name,
        distro_type="self_hosted",
        description="Use a built-in vLLM engine for running LLM inference",
-        docker_image=None,
+        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model],
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@ -182,6 +182,7 @@ def test_builtin_tool_web_search(llama_stack_client, agent_config):
    assert "tool_execution>" in logs_str
    assert "Tool:brave_search Response:" in logs_str
    assert "mark zuckerberg" in logs_str.lower()
+    if len(agent_config["output_shields"]) > 0:
        assert "No Violation" in logs_str


--- a/tests/client-sdk/inference/dog.png
+++ b/tests/client-sdk/inference/dog.png
--- a/tests/client-sdk/inference/test_inference.py
+++ b/tests/client-sdk/inference/test_inference.py
@ -4,6 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import base64
+import os
+
 import pytest
 from pydantic import BaseModel

@ -69,6 +72,16 @@ def get_weather_tool_definition():
    }


+@pytest.fixture
+def base64_image_url():
+    image_path = os.path.join(os.path.dirname(__file__), "dog.png")
+    with open(image_path, "rb") as image_file:
+        # Convert the image to base64
+        base64_string = base64.b64encode(image_file.read()).decode("utf-8")
+        base64_url = f"data:image;base64,{base64_string}"
+        return base64_url
+
+
 def test_completion_non_streaming(llama_stack_client, text_model_id):
    response = llama_stack_client.inference.completion(
        content="Complete the sentence using one word: Roses are red, violets are ",
@ -356,3 +369,31 @@ def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
        streamed_content += chunk.event.delta.text.lower()
    assert len(streamed_content) > 0
    assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
+
+
+def test_image_chat_completion_base64_url(
+    llama_stack_client, vision_model_id, base64_image_url
+):
+
+    message = {
+        "role": "user",
+        "content": [
+            {
+                "type": "image",
+                "url": {
+                    "uri": base64_image_url,
+                },
+            },
+            {
+                "type": "text",
+                "text": "Describe what is in this image.",
+            },
+        ],
+    }
+    response = llama_stack_client.inference.chat_completion(
+        model_id=vision_model_id,
+        messages=[message],
+        stream=False,
+    )
+    message_content = response.completion_message.content.lower().strip()
+    assert len(message_content) > 0