diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 8eecf84ab..f3f41b18a 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -1017,14 +1017,14 @@ " \"content\": SYSTEM_PROMPT_TEMPLATE.format(subject=subset),\n", "}\n", "\n", - "client.eval_tasks.register(\n", - " eval_task_id=\"meta-reference::mmmu\",\n", + "client.benchmarks.register(\n", + " benchmark_id=\"meta-reference::mmmu\",\n", " dataset_id=f\"mmmu-{subset}-{split}\",\n", " scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n", ")\n", "\n", - "response = client.eval.evaluate_rows(\n", - " task_id=\"meta-reference::mmmu\",\n", + "response = client.eval.evaluate_rows_alpha(\n", + " benchmark_id=\"meta-reference::mmmu\",\n", " input_rows=eval_rows,\n", " scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n", " task_config={\n", @@ -1196,14 +1196,14 @@ " provider_id=\"together\",\n", ")\n", "\n", - "client.eval_tasks.register(\n", - " eval_task_id=\"meta-reference::simpleqa\",\n", + "client.benchmarks.register(\n", + " benchmark_id=\"meta-reference::simpleqa\",\n", " dataset_id=simpleqa_dataset_id,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", ")\n", "\n", - "response = client.eval.evaluate_rows(\n", - " task_id=\"meta-reference::simpleqa\",\n", + "response = client.eval.evaluate_rows_alpha(\n", + " benchmark_id=\"meta-reference::simpleqa\",\n", " input_rows=eval_rows.rows,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", " task_config={\n", @@ -1351,8 +1351,8 @@ " \"enable_session_persistence\": False,\n", "}\n", "\n", - "response = client.eval.evaluate_rows(\n", - " task_id=\"meta-reference::simpleqa\",\n", + "response = client.eval.evaluate_rows_alpha(\n", + " benchmark_id=\"meta-reference::simpleqa\",\n", " input_rows=eval_rows.rows,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", " task_config={\n",