chore: rename task_config to benchmark_config (#1397)

# What does this PR do? - This was missed from previous deprecation: https://github.com/meta-llama/llama-stack/pull/1186 - Part of https://github.com/meta-llama/llama-stack/issues/1396 [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` pytest -v -s --nbval-lax ./llama-stack/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ``` [//]: # (## Documentation)
2025-12-03 09:53:45 +00:00 · 2025-03-04 12:44:04 -08:00 · 2025-03-04 12:44:04 -08:00 · e9a37bad63
commit e9a37bad63
parent 158b6dc404
12 changed files with 55 additions and 46 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -6355,7 +6355,7 @@
                            "type": "string"
                        }
                    },
-                    "task_config": {
+                    "benchmark_config": {
                        "$ref": "#/components/schemas/BenchmarkConfig"
                    }
                },
@ -6363,7 +6363,7 @@
                "required": [
                    "input_rows",
                    "scoring_functions",
-                    "task_config"
+                    "benchmark_config"
                ],
                "title": "EvaluateRowsRequest"
            },
@ -9248,13 +9248,13 @@
            "RunEvalRequest": {
                "type": "object",
                "properties": {
-                    "task_config": {
+                    "benchmark_config": {
                        "$ref": "#/components/schemas/BenchmarkConfig"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "task_config"
+                    "benchmark_config"
                ],
                "title": "RunEvalRequest"
            },
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -4357,13 +4357,13 @@ components:
          type: array
          items:
            type: string
-        task_config:
+        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
      additionalProperties: false
      required:
        - input_rows
        - scoring_functions
-        - task_config
+        - benchmark_config
      title: EvaluateRowsRequest
    EvaluateResponse:
      type: object
@ -6168,11 +6168,11 @@ components:
    RunEvalRequest:
      type: object
      properties:
-        task_config:
+        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
      additionalProperties: false
      required:
-        - task_config
+        - benchmark_config
      title: RunEvalRequest
    Job:
      type: object
--- a/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
+++ b/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
@ -3675,7 +3675,7 @@
        "    benchmark_id=\"llama3.2-3B-instruct:tax_eval\",\n",
        "    input_rows=eval_rows.rows,\n",
        "    scoring_functions=[\"braintrust::answer-similarity\"],\n",
-        "    task_config={\n",
+        "    benchmark_config={\n",
        "        \"type\": \"benchmark\",\n",
        "        \"eval_candidate\": {\n",
        "            \"type\": \"model\",\n",
@ -6383,7 +6383,7 @@
        "    benchmark_id=\"Llama-3.2-3B-Instruct-sft-0:tax_eval\",\n",
        "    input_rows=eval_rows.rows,\n",
        "    scoring_functions=[\"braintrust::answer-similarity\"],\n",
-        "    task_config={\n",
+        "    benchmark_config={\n",
        "        \"type\": \"benchmark\",\n",
        "        \"eval_candidate\": {\n",
        "            \"type\": \"model\",\n",
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@ -781,7 +781,7 @@
        "    benchmark_id=\"meta-reference::mmmu\",\n",
        "    input_rows=eval_rows,\n",
        "    scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n",
-        "    task_config={\n",
+        "    benchmark_config={\n",
        "        \"type\": \"benchmark\",\n",
        "        \"eval_candidate\": {\n",
        "            \"type\": \"model\",\n",
@ -960,7 +960,7 @@
        "    benchmark_id=\"meta-reference::simpleqa\",\n",
        "    input_rows=eval_rows.rows,\n",
        "    scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
-        "    task_config={\n",
+        "    benchmark_config={\n",
        "        \"type\": \"benchmark\",\n",
        "        \"eval_candidate\": {\n",
        "            \"type\": \"model\",\n",
@ -1109,7 +1109,7 @@
        "    benchmark_id=\"meta-reference::simpleqa\",\n",
        "    input_rows=eval_rows.rows,\n",
        "    scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
-        "    task_config={\n",
+        "    benchmark_config={\n",
        "        \"type\": \"benchmark\",\n",
        "        \"eval_candidate\": {\n",
        "            \"type\": \"agent\",\n",
--- a/docs/source/building_applications/evals.md
+++ b/docs/source/building_applications/evals.md
@ -51,7 +51,7 @@ response = client.eval.evaluate_rows(
    benchmark_id="meta-reference::mmmu",
    input_rows=eval_rows,
    scoring_functions=["basic::regex_parser_multiple_choice_answer"],
-    task_config={
+    benchmark_config={
        "type": "benchmark",
        "eval_candidate": {
            "type": "model",
@ -109,7 +109,7 @@ response = client.eval.evaluate_rows(
    benchmark_id="meta-reference::simpleqa",
    input_rows=eval_rows.rows,
    scoring_functions=["llm-as-judge::405b-simpleqa"],
-    task_config={
+    benchmark_config={
        "type": "benchmark",
        "eval_candidate": {
            "type": "model",
@ -158,7 +158,7 @@ response = client.eval.evaluate_rows(
    benchmark_id="meta-reference::simpleqa",
    input_rows=eval_rows.rows,
    scoring_functions=["llm-as-judge::405b-simpleqa"],
-    task_config={
+    benchmark_config={
        "type": "benchmark",
        "eval_candidate": {
            "type": "agent",
--- a/docs/source/building_applications/evaluation.md
+++ b/docs/source/building_applications/evaluation.md
@ -19,7 +19,7 @@ response = client.benchmarks.register(
 # Run evaluation
 job = client.eval.run_eval(
    benchmark_id="my_eval",
-    task_config={
+    benchmark_config={
        "type": "app",
        "eval_candidate": {"type": "agent", "config": agent_config},
    },
--- a/docs/source/references/evals_reference/index.md
+++ b/docs/source/references/evals_reference/index.md
@ -87,7 +87,7 @@ response = client.eval.evaluate_rows(
    benchmark_id="meta-reference::mmmu",
    input_rows=eval_rows,
    scoring_functions=["basic::regex_parser_multiple_choice_answer"],
-    task_config={
+    benchmark_config={
        "type": "benchmark",
        "eval_candidate": {
            "type": "model",
@ -145,7 +145,7 @@ response = client.eval.evaluate_rows(
    benchmark_id="meta-reference::simpleqa",
    input_rows=eval_rows.rows,
    scoring_functions=["llm-as-judge::405b-simpleqa"],
-    task_config={
+    benchmark_config={
        "type": "benchmark",
        "eval_candidate": {
            "type": "model",
@ -195,7 +195,7 @@ response = client.eval.evaluate_rows(
    benchmark_id="meta-reference::simpleqa",
    input_rows=eval_rows.rows,
    scoring_functions=["llm-as-judge::405b-simpleqa"],
-    task_config={
+    benchmark_config={
        "type": "benchmark",
        "eval_candidate": {
            "type": "agent",