diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 381f37f1f..cba7829a1 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -81,7 +81,7 @@ "deprecated": true } }, - "/v1/eval-tasks/{benchmark_id}": { + "/v1/eval-tasks/{task_id}": { "get": { "responses": { "200": { @@ -109,7 +109,7 @@ "parameters": [ { "name": "task_id", - "in": "query", + "in": "path", "required": true, "schema": { "type": "string" diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 83bc5483c..0bc498776 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -35,7 +35,7 @@ paths: $ref: '#/components/schemas/DeprecatedEvaluateRowsRequest' required: true deprecated: true - /v1/eval-tasks/{benchmark_id}: + /v1/eval-tasks/{task_id}: get: responses: '200': @@ -51,7 +51,7 @@ paths: description: '' parameters: - name: task_id - in: query + in: path required: true schema: type: string diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 857b7f133..8eecf84ab 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -1017,14 +1017,14 @@ " \"content\": SYSTEM_PROMPT_TEMPLATE.format(subject=subset),\n", "}\n", "\n", - "client.benchmarks.register(\n", - " benchmark_id=\"meta-reference::mmmu\",\n", + "client.eval_tasks.register(\n", + " eval_task_id=\"meta-reference::mmmu\",\n", " dataset_id=f\"mmmu-{subset}-{split}\",\n", " scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n", ")\n", "\n", "response = client.eval.evaluate_rows(\n", - " benchmark_id=\"meta-reference::mmmu\",\n", + " task_id=\"meta-reference::mmmu\",\n", " input_rows=eval_rows,\n", " scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n", " task_config={\n", @@ -1196,8 +1196,8 @@ " provider_id=\"together\",\n", ")\n", "\n", - "client.benchmarks.register(\n", - " benchmark_id=\"meta-reference::simpleqa\",\n", + "client.eval_tasks.register(\n", + " eval_task_id=\"meta-reference::simpleqa\",\n", " dataset_id=simpleqa_dataset_id,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", ")\n", @@ -1214,7 +1214,7 @@ " \"sampling_params\": {\n", " \"strategy\": {\n", " \"type\": \"greedy\",\n", - " },b\n", + " },\n", " \"max_tokens\": 4096,\n", " \"repeat_penalty\": 1.0,\n", " },\n", @@ -1352,7 +1352,7 @@ "}\n", "\n", "response = client.eval.evaluate_rows(\n", - " benchmark_id=\"meta-reference::simpleqa\",\n", + " task_id=\"meta-reference::simpleqa\",\n", " input_rows=eval_rows.rows,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", " task_config={\n",