fix eval notebook & add test to workflow (#803)

This commit is contained in:
Xi Yan 2025-01-16 23:11:21 -08:00 committed by GitHub
parent 9d574f4aee
commit c2a072911d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 277 additions and 276 deletions

View file

@ -239,5 +239,6 @@ jobs:
pip install pytest nbval
llama stack build --template together --image-type venv
pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Building_AI_Applications.ipynb
pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
# TODO: add trigger for integration test workflow & docker builds

View file

@ -105,6 +105,7 @@
}
],
"source": [
"# NBVAL_SKIP\n",
"!pip install -U llama-stack"
]
},
@ -309,12 +310,13 @@
}
],
"source": [
"# NBVAL_SKIP\n",
"!llama stack build --template together --image-type venv"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@ -328,7 +330,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n"
"Not in Google Colab environment\n",
"\u001b[33mWarning: `bwrap` is not available. Code interpreter tool will not work correctly.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/master/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
@ -356,63 +367,83 @@
"- safety\n",
"- scoring\n",
"- telemetry\n",
"conda_env: together\n",
"- tool_runtime\n",
"datasets: <span style=\"font-weight: bold\">[]</span>\n",
"docker_image: null\n",
"eval_tasks: <span style=\"font-weight: bold\">[]</span>\n",
"image_name: together\n",
"memory_banks: <span style=\"font-weight: bold\">[]</span>\n",
"metadata_store:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
"models:\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct\n",
" model_type: &amp;id001 !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: null\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-FP8\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct-Turbo\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
" model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision-Turbo\n",
"- metadata:\n",
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span>\n",
" model_id: all-MiniLM-L6-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: sentence-transformers\n",
" provider_model_id: null\n",
"providers:\n",
" agents:\n",
" - config:\n",
" persistence_store:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">agents_store.db</span>\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">agents_store.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
@ -430,14 +461,17 @@
" provider_type: inline::meta-reference\n",
" inference:\n",
" - config:\n",
" api_key: 4985b03e627419b2964d34b8519ac6c4319f094d1ffb4f45514b4eb87e5427a2\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" url: <span style=\"color: #0000ff; text-decoration-color: #0000ff; text-decoration: underline\">https://api.together.xyz/v1</span>\n",
" provider_id: together\n",
" provider_type: remote::together\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: sentence-transformers\n",
" provider_type: inline::sentence-transformers\n",
" memory:\n",
" - config:\n",
" kvstore:\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: faiss\n",
@ -454,22 +488,52 @@
" provider_id: llm-as-judge\n",
" provider_type: inline::llm-as-judge\n",
" - config:\n",
" openai_api_key: <span style=\"color: #008000; text-decoration-color: #008000\">''</span>\n",
" openai_api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" provider_id: braintrust\n",
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::b</span>raintrust\n",
" telemetry:\n",
" - config:\n",
" service_name: llama-stack\n",
" sinks: sqlite\n",
" sqlite_db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/root/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">trace_store.db</span>\n",
" sqlite_db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">trace_store.db</span>\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" tool_runtime:\n",
" - config:\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" max_results: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>\n",
" provider_id: brave-search\n",
" provider_type: remot<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::b</span>rave-search\n",
" - config:\n",
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
" max_results: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>\n",
" provider_id: tavily-search\n",
" provider_type: remote::tavily-search\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: code-interpreter\n",
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::c</span>ode-interpreter\n",
" - config: <span style=\"font-weight: bold\">{}</span>\n",
" provider_id: memory-runtime\n",
" provider_type: inline::memory-runtime\n",
"scoring_fns: <span style=\"font-weight: bold\">[]</span>\n",
"shields:\n",
"- params: null\n",
" provider_id: null\n",
" provider_shield_id: null\n",
" shield_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
"tool_groups:\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: tavily-search\n",
" toolgroup_id: builtin::websearch\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: memory-runtime\n",
" toolgroup_id: builtin::memory\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: code-interpreter\n",
" toolgroup_id: builtin::code_interpreter\n",
"version: <span style=\"color: #008000; text-decoration-color: #008000\">'2'</span>\n",
"\n",
"</pre>\n"
@ -484,63 +548,83 @@
"- safety\n",
"- scoring\n",
"- telemetry\n",
"conda_env: together\n",
"- tool_runtime\n",
"datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"docker_image: null\n",
"eval_tasks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"image_name: together\n",
"memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"metadata_store:\n",
" db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
"models:\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n",
" model_type: &id001 !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: null\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision\n",
" model_type: *id001\n",
" provider_id: null\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - llm\n",
" provider_id: together\n",
" provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
"- metadata:\n",
" embedding_dimension: \u001b[1;36m384\u001b[0m\n",
" model_id: all-MiniLM-L6-v2\n",
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
" - embedding\n",
" provider_id: sentence-transformers\n",
" provider_model_id: null\n",
"providers:\n",
" agents:\n",
" - config:\n",
" persistence_store:\n",
" db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95magents_store.db\u001b[0m\n",
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95magents_store.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: meta-reference\n",
@ -558,14 +642,17 @@
" provider_type: inline::meta-reference\n",
" inference:\n",
" - config:\n",
" api_key: 4985b03e627419b2964d34b8519ac6c4319f094d1ffb4f45514b4eb87e5427a2\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" url: \u001b[4;94mhttps://api.together.xyz/v1\u001b[0m\n",
" provider_id: together\n",
" provider_type: remote::together\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: sentence-transformers\n",
" provider_type: inline::sentence-transformers\n",
" memory:\n",
" - config:\n",
" kvstore:\n",
" db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
" namespace: null\n",
" type: sqlite\n",
" provider_id: faiss\n",
@ -582,58 +669,74 @@
" provider_id: llm-as-judge\n",
" provider_type: inline::llm-as-judge\n",
" - config:\n",
" openai_api_key: \u001b[32m''\u001b[0m\n",
" openai_api_key: \u001b[32m'********'\u001b[0m\n",
" provider_id: braintrust\n",
" provider_type: inlin\u001b[1;92me::b\u001b[0mraintrust\n",
" telemetry:\n",
" - config:\n",
" service_name: llama-stack\n",
" sinks: sqlite\n",
" sqlite_db_path: \u001b[35m/root/.llama/distributions/together/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n",
" sqlite_db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mtrace_store.db\u001b[0m\n",
" provider_id: meta-reference\n",
" provider_type: inline::meta-reference\n",
" tool_runtime:\n",
" - config:\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" max_results: \u001b[1;36m3\u001b[0m\n",
" provider_id: brave-search\n",
" provider_type: remot\u001b[1;92me::b\u001b[0mrave-search\n",
" - config:\n",
" api_key: \u001b[32m'********'\u001b[0m\n",
" max_results: \u001b[1;36m3\u001b[0m\n",
" provider_id: tavily-search\n",
" provider_type: remote::tavily-search\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: code-interpreter\n",
" provider_type: inlin\u001b[1;92me::c\u001b[0mode-interpreter\n",
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
" provider_id: memory-runtime\n",
" provider_type: inline::memory-runtime\n",
"scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
"shields:\n",
"- params: null\n",
" provider_id: null\n",
" provider_shield_id: null\n",
" shield_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
"tool_groups:\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: tavily-search\n",
" toolgroup_id: builtin::websearch\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: memory-runtime\n",
" toolgroup_id: builtin::memory\n",
"- args: null\n",
" mcp_endpoint: null\n",
" provider_id: code-interpreter\n",
" toolgroup_id: builtin::code_interpreter\n",
"version: \u001b[32m'2'\u001b[0m\n",
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"Model(identifier='meta-llama/Llama-3.1-405B-Instruct', metadata={}, provider_id='together', provider_resource_id='meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo', type='model', model_type='llm')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"\n",
"from google.colab import userdata\n",
"\n",
"os.environ[\"TOGETHER_API_KEY\"] = userdata.get(\"TOGETHER_API_KEY\")\n",
"try:\n",
" from google.colab import userdata\n",
" os.environ['TOGETHER_API_KEY'] = userdata.get('TOGETHER_API_KEY')\n",
" os.environ['TAVILY_SEARCH_API_KEY'] = userdata.get('TAVILY_SEARCH_API_KEY')\n",
"except ImportError:\n",
" print(\"Not in Google Colab environment\")\n",
"\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"together\")\n",
"_ = client.initialize()\n",
"\n",
"# register 405B as LLM Judge model\n",
"client.models.register(\n",
" model_id=\"meta-llama/Llama-3.1-405B-Instruct\",\n",
" provider_model_id=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n",
" provider_id=\"together\",\n",
")\n"
"_ = client.initialize()"
]
},
{
@ -662,7 +765,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"id": "TC_IwIAQo4q-"
},
@ -670,12 +773,12 @@
"source": [
"name = \"llamastack/mmmu\"\n",
"subset = \"Agriculture\"\n",
"split = \"dev\"\n"
"split = \"dev\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@ -788,130 +891,13 @@
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "feb82e061ee44283b4a46be858ef4cd7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"README.md: 0%| | 0.00/36.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c788d4e9e1e24dca9b6503689df9b631",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"dev-00000-of-00001.parquet: 0%| | 0.00/29.5M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "44f585990aa244d8ba61f892dc1ccc1c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"validation-00000-of-00001.parquet: 0%| | 0.00/165M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4fed5720f30b4b3cbbc606a4f25e223b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"test-00000-of-00003.parquet: 0%| | 0.00/461M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9659140487ca4d3ea799196d2c1ecf61",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"test-00001-of-00003.parquet: 0%| | 0.00/454M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c06f9a090fb54c74b947634bf6d11fa8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"test-00002-of-00003.parquet: 0%| | 0.00/471M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4f788a7920c346f3b42900825bd6711a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating dev split: 0%| | 0/5 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9d2b6eabf7e14436b72bbf374b4a2a0a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating validation split: 0%| | 0/30 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "24e48376a72940679989a39a40bbe7f6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/287 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
"name": "stderr",
"output_type": "stream",
"text": [
"Generating dev split: 100%|██████████| 5/5 [00:00<00:00, 139.81 examples/s]\n",
"Generating validation split: 100%|██████████| 30/30 [00:00<00:00, 258.29 examples/s]\n",
"Generating test split: 100%|██████████| 287/287 [00:01<00:00, 197.69 examples/s]\n"
]
}
],
"source": [
@ -936,7 +922,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@ -951,7 +937,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 5/5 [00:51<00:00, 10.28s/it]\n"
"100%|██████████| 5/5 [00:42<00:00, 8.60s/it]\n"
]
},
{
@ -959,25 +945,21 @@
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Answer: D'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The Colorado potato beetle (Leptinotarsa decemlineata) is a significant pest of potatoes, causing damage to the leaves and stems of potato plants. The insect with black-colored antennae in the image is a Colorado potato beetle, which is known for its distinctive black and yellow stripes. On the other hand, the insect with tan-colored antennae is not a Colorado potato beetle and does not appear to be a pest of potatoes.\\n\\n*Answer*: B) The one with black coloured antennae'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B) Two pathogens'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'To determine the count of pathogens infecting this sunflower leaf, we need to analyze the image carefully. The image shows a sunflower leaf with several brown spots and patches on its surface. These brown spots and patches are indicative of fungal infections, which are common pathogens that affect sunflowers.\\n\\nUpon closer inspection, we can see that there are two distinct types of brown spots and patches on the leaf. One type is smaller and more circular in shape, while the other type is larger and more irregular in shape. This suggests that there may be two different pathogens infecting the leaf.\\n\\nHowever, without further information or testing, it is difficult to say for certain whether these two types of brown spots and patches are caused by different pathogens or if they are just different stages of the same infection. Therefore, based on the available information, the most likely answer is:\\n\\nAnswer: B) Two pathogens'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA) Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB) Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC) Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD) Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE) Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Answer: D'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Based on the image, the most likely reason for the massive gum production on the trunks of these grapefruit trees in Cyprus is a fungal infection. The gummosis, or the production of gum, is a common symptom of fungal diseases in citrus trees, and it can be caused by various factors such as root damage, water stress, or nutrient deficiencies. However, in this case, the presence of the gum on the trunks of the trees suggests that the cause is more likely related to a fungal infection.\\n\\nAnswer: E) Fungal gummosis'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The correct answer is D) Most viruses have a specific relationship with their vectors.\\n\\nExplanation:\\n\\n* Laboratory work with micro manipulators can mimic the transmission of viruses, but this is not the primary method of virus transmission in nature.\\n* Not all plant-feeding insects can transmit viruses; only specific species that have evolved to transmit particular viruses are capable of doing so.\\n* Similarly, not all plant viruses can be transmitted by insects; some are transmitted through other means such as mechanical transmission or nematodes.\\n* The correct assertion is that most viruses have a specific relationship with their vectors, meaning that each virus is typically transmitted by a specific type of insect or vector.\\n\\nAnswer: D'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The petioles of this rhubarb are splitting, and we need to determine which of the listed issues would not be the cause. \\n\\nFirst, let's consider physiological problems (A). Rhubarb is a hardy plant, but it can still experience physiological issues due to factors like temperature fluctuations, water stress, or nutrient deficiencies. These issues could potentially cause the petioles to split.\\n\\nNext, let's look at phytoplasma infection (B). Phytoplasmas are bacteria-like organisms that can infect plants, causing a range of symptoms including yellowing or browning of leaves, stunted growth, and distorted or split petioles. So, phytoplasma infection could also be a possible cause.\\n\\nNow, let's consider animal damage (D). Animals like rabbits, deer, or rodents might feed on the rhubarb leaves, causing damage to the petioles and potentially leading to splitting.\\n\\nFinally, let's think about bacteria (E). Bacterial infections can cause a range of symptoms in plants, including soft rot, leaf spot, and petiole splitting. So, bacteria could also be a potential cause.\\n\\nBased on this analysis, it seems that all of the listed issues could potentially cause the petioles of this rhubarb to split. Therefore, the correct answer is:\\n\\nAnswer: C\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A) Physiological problems'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">]</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'basic::regex_parser_multiple_choice_answer'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'accuracy'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.2</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_correct'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_total'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5.0</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'accuracy'</span>: <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'accuracy'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.2</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_correct'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'num_total'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span><span style=\"font-weight: bold\">}}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1.0</span><span style=\"font-weight: bold\">}</span>, <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">0.0</span><span style=\"font-weight: bold\">}]</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">)</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">}</span>\n",
@ -987,25 +969,21 @@
"text/plain": [
"\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Colorado potato beetle \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLeptinotarsa decemlineata\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is a significant pest of potatoes, causing damage to the leaves and stems of potato plants. The insect with black-colored antennae in the image is a Colorado potato beetle, which is known for its distinctive black and yellow stripes. On the other hand, the insect with tan-colored antennae is not a Colorado potato beetle and does not appear to be a pest of potatoes.\\n\\n*Answer*: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m The one with black coloured antennae'\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'To determine the count of pathogens infecting this sunflower leaf, we need to analyze the image carefully. The image shows a sunflower leaf with several brown spots and patches on its surface. These brown spots and patches are indicative of fungal infections, which are common pathogens that affect sunflowers.\\n\\nUpon closer inspection, we can see that there are two distinct types of brown spots and patches on the leaf. One type is smaller and more circular in shape, while the other type is larger and more irregular in shape. This suggests that there may be two different pathogens infecting the leaf.\\n\\nHowever, without further information or testing, it is difficult to say for certain whether these two types of brown spots and patches are caused by different pathogens or if they are just different stages of the same infection. Therefore, based on the available information, the most likely answer is:\\n\\nAnswer: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Based on the image, the most likely reason for the massive gum production on the trunks of these grapefruit trees in Cyprus is a fungal infection. The gummosis, or the production of gum, is a common symptom of fungal diseases in citrus trees, and it can be caused by various factors such as root damage, water stress, or nutrient deficiencies. However, in this case, the presence of the gum on the trunks of the trees suggests that the cause is more likely related to a fungal infection.\\n\\nAnswer: E\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The correct answer is D\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Most viruses have a specific relationship with their vectors.\\n\\nExplanation:\\n\\n* Laboratory work with micro manipulators can mimic the transmission of viruses, but this is not the primary method of virus transmission in nature.\\n* Not all plant-feeding insects can transmit viruses; only specific species that have evolved to transmit particular viruses are capable of doing so.\\n* Similarly, not all plant viruses can be transmitted by insects; some are transmitted through other means such as mechanical transmission or nematodes.\\n* The correct assertion is that most viruses have a specific relationship with their vectors, meaning that each virus is typically transmitted by a specific type of insect or vector.\\n\\nAnswer: D'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The petioles of this rhubarb are splitting, and we need to determine which of the listed issues would not be the cause. \\n\\nFirst, let's consider physiological problems \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Rhubarb is a hardy plant, but it can still experience physiological issues due to factors like temperature fluctuations, water stress, or nutrient deficiencies. These issues could potentially cause the petioles to split.\\n\\nNext, let's look at phytoplasma infection \u001b[0m\u001b[32m(\u001b[0m\u001b[32mB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Phytoplasmas are bacteria-like organisms that can infect plants, causing a range of symptoms including yellowing or browning of leaves, stunted growth, and distorted or split petioles. So, phytoplasma infection could also be a possible cause.\\n\\nNow, let's consider animal damage \u001b[0m\u001b[32m(\u001b[0m\u001b[32mD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Animals like rabbits, deer, or rodents might feed on the rhubarb leaves, causing damage to the petioles and potentially leading to splitting.\\n\\nFinally, let's think about bacteria \u001b[0m\u001b[32m(\u001b[0m\u001b[32mE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Bacterial infections can cause a range of symptoms in plants, including soft rot, leaf spot, and petiole splitting. So, bacteria could also be a potential cause.\\n\\nBased on this analysis, it seems that all of the listed issues could potentially cause the petioles of this rhubarb to split. Therefore, the correct answer is:\\n\\nAnswer: C\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological problems'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n",
"\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[32m'basic::regex_parser_multiple_choice_answer'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'num_correct'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'num_total'\u001b[0m: \u001b[1;36m5.0\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'accuracy'\u001b[0m: \u001b[1;36m0.2\u001b[0m, \u001b[32m'num_correct'\u001b[0m: \u001b[1;36m1.0\u001b[0m, \u001b[32m'num_total'\u001b[0m: \u001b[1;36m5\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m1.0\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[1;36m0.0\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n",
@ -1056,7 +1034,9 @@
" \"model\": \"meta-llama/Llama-3.2-90B-Vision-Instruct\",\n",
" \"sampling_params\": {\n",
" \"strategy\": {\n",
" \"type\": \"greedy\",\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 1.0,\n",
" \"top_p\": 0.95,\n",
" },\n",
" \"max_tokens\": 4096,\n",
" \"repeat_penalty\": 1.0,\n",
@ -1081,7 +1061,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {
"id": "HXmZf3Ymw-aX"
},
@ -1108,7 +1088,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {
"id": "Gc8azb4Rxr5J"
},
@ -1122,7 +1102,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@ -1136,7 +1116,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 5/5 [00:48<00:00, 9.68s/it]\n"
" 0%| | 0/5 [00:00<?, ?it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 5/5 [00:31<00:00, 6.38s/it]\n"
]
},
{
@ -1144,29 +1131,25 @@
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The recipient of the IEEE Frank Rosenblatt Award in 2010 was Vladimir Vapnik'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm not sure who received the IEEE Frank Rosenblatt Award in 2010.\"</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm not aware of the information about the 2018 Jerlov Award recipient.\"</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I am unable to verify who was awarded the Oceanography Society's Jerlov Award in 2018.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'I do not have information on the Leipzig 1877 tournament.'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Radcliffe College was a women's liberal arts college, but it has since been integrated into Harvard University.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The Leipzig 1877 tournament was organized in the honor of 50th anniversary of the first chess club in Germany (the Leipzig Chess Club's) founding and of the 50th anniversary of Paul Morphy's birth\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Karl Küchler's 1908 guidebook states that Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted 'Dying Achilles'.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">]</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'llm-as-judge::405b-simpleqa'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"font-weight: bold\">]</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">)</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">}</span>\n",
@ -1176,29 +1159,25 @@
"text/plain": [
"\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The recipient of the IEEE Frank Rosenblatt Award in 2010 was Vladimir Vapnik'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not sure who received the IEEE Frank Rosenblatt Award in 2010.\"\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not aware of the information about the 2018 Jerlov Award recipient.\"\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify who was awarded the Oceanography Society's Jerlov Award in 2018.\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'I do not have information on the Leipzig 1877 tournament.'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college, but it has since been integrated into Harvard University.\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The Leipzig 1877 tournament was organized in the honor of 50th anniversary of the first chess club in Germany \u001b[0m\u001b[32m(\u001b[0m\u001b[32mthe Leipzig Chess Club's\u001b[0m\u001b[32m)\u001b[0m\u001b[32m founding and of the 50th anniversary of Paul Morphy's birth\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Karl Küchler's 1908 guidebook states that Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted 'Dying Achilles'.\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n",
"\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n",
@ -1210,6 +1189,13 @@
}
],
"source": [
"# register 405B as LLM Judge model\n",
"client.models.register(\n",
" model_id=\"meta-llama/Llama-3.1-405B-Instruct\",\n",
" provider_model_id=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n",
" provider_id=\"together\",\n",
")\n",
"\n",
"client.eval_tasks.register(\n",
" eval_task_id=\"meta-reference::simpleqa\",\n",
" dataset_id=simpleqa_dataset_id,\n",
@ -1257,7 +1243,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@ -1271,7 +1257,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"5it [00:26, 5.29s/it]\n"
"5it [00:06, 1.33s/it]\n"
]
},
{
@ -1280,27 +1266,25 @@
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm sorry but I cannot find the recipient of the IEEE Frank Rosenblatt Award in 2010.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The IEEE Frank Rosenblatt Award was given to Professor John Shawe-Taylor in 2010 for his contributions to the foundations of kernel methods.'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I'm not sure who was awarded the Oceanography Society's Jerlov Award in 2018. Let me search for the information.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The Jerlov Award is given by The Oceanography Society to recognize outstanding contributions to the field of ocean optics. The 2018 Jerlov Award was awarded to Dr. Kendall L. Carder.'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The women's liberal arts college in Cambridge, Massachusetts is called Radcliffe College. However, in 1999, it merged with Harvard University and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The women's liberal arts college in Cambridge, Massachusetts is Radcliffe College. However, in 1999, Radcliffe College merged with Harvard University to form the Radcliffe Institute for Advanced Study at Harvard University. The institute is still located in Cambridge, Massachusetts, and is dedicated to supporting women's education and research.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The Leipzig 1877 tournament was organized in honor of Adolf Anderssen.'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The 1877 Leipzig tournament was organized in honor of Anderssen, a German chess master.'</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Empress Elizabeth of Austria's favorite sculpture, made for her villa Achilleion at Corfu, depicted Achilles.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"According to Karl Küchler, Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted the Dying Achilles.\"</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">]</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'llm-as-judge::405b-simpleqa'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[</span>\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C.'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'A'</span><span style=\"font-weight: bold\">}</span>,\n",
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'B'</span><span style=\"font-weight: bold\">}</span>\n",
@ -1314,27 +1298,25 @@
"\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm sorry but I cannot find the recipient of the IEEE Frank Rosenblatt Award in 2010.\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The IEEE Frank Rosenblatt Award was given to Professor John Shawe-Taylor in 2010 for his contributions to the foundations of kernel methods.'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I'm not sure who was awarded the Oceanography Society's Jerlov Award in 2018. Let me search for the information.\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Jerlov Award is given by The Oceanography Society to recognize outstanding contributions to the field of ocean optics. The 2018 Jerlov Award was awarded to Dr. Kendall L. Carder.'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The women's liberal arts college in Cambridge, Massachusetts is called Radcliffe College. However, in 1999, it merged with Harvard University and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The women's liberal arts college in Cambridge, Massachusetts is Radcliffe College. However, in 1999, Radcliffe College merged with Harvard University to form the Radcliffe Institute for Advanced Study at Harvard University. The institute is still located in Cambridge, Massachusetts, and is dedicated to supporting women's education and research.\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The Leipzig 1877 tournament was organized in honor of Adolf Anderssen.'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The 1877 Leipzig tournament was organized in honor of Anderssen, a German chess master.'\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Empress Elizabeth of Austria's favorite sculpture, made for her villa Achilleion at Corfu, depicted Achilles.\"\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"According to Karl Küchler, Empress Elizabeth of Austria's favorite sculpture, which was made for her villa Achilleion at Corfu, depicted the Dying Achilles.\"\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n",
"\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n",
"\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
"\u001b[2;32m│ │ \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C.'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'A'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'A'\u001b[0m\u001b[1m}\u001b[0m,\n",
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'B'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'B'\u001b[0m\u001b[1m}\u001b[0m\n",
@ -1350,15 +1332,17 @@
],
"source": [
"agent_config = {\n",
" \"model\": \"meta-llama/Llama-3.1-405B-Instruct\",\n",
" \"instructions\": \"You are a helpful assistant\",\n",
" \"sampling_params\": {\"strategy\": {\"type\": \"greedy\"}},\n",
" \"tools\": [\n",
" {\n",
" \"type\": \"brave_search\",\n",
" \"engine\": \"tavily\",\n",
" \"api_key\": userdata.get(\"TAVILY_SEARCH_API_KEY\"),\n",
" \"model\": \"meta-llama/Llama-3.3-70B-Instruct\",\n",
" \"instructions\": \"You are a helpful assistant that have access to tool to search the web. \",\n",
" \"sampling_params\": {\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 0.5,\n",
" \"top_p\": 0.9,\n",
" }\n",
" },\n",
" \"toolgroups\": [\n",
" \"builtin::websearch\",\n",
" ],\n",
" \"tool_choice\": \"auto\",\n",
" \"tool_prompt_format\": \"json\",\n",
@ -1381,6 +1365,13 @@
")\n",
"pprint(response)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -1396,7 +1387,16 @@
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {