update

2025-08-10 20:19:22 +00:00 · 2025-02-13 09:48:52 -08:00 · 2025-02-13 09:48:52 -08:00 · 9ce00ede9b
commit 9ce00ede9b
parent ceff63130d
4 changed files with 18 additions and 58 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -2665,15 +2665,6 @@
            "EvalTask": {
                "type": "object",
                "properties": {
                    "identifier": {
                        "type": "string"
                    },
                    "provider_resource_id": {
                        "type": "string"
                    },
                    "provider_id": {
                        "type": "string"
                    },
                    "type": {
                        "type": "string",
                        "const": "eval_task",
@ -2682,53 +2673,23 @@
                    "dataset_id": {
                        "type": "string"
                    },
-                    "scoring_functions": {
+                    "config": {
-                        "type": "array",
+                        "$ref": "#/components/schemas/AgentConfig"
                        "items": {
                            "type": "string"
                        }
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "identifier",
                    "provider_resource_id",
                    "provider_id",
                    "type",
-                    "dataset_id",
+                    "config"
                    "scoring_functions",
                    "metadata"
                ]
            },
            "ListEvalTasksResponse": {
                "type": "object",
                "properties": {
-                    "data": {
+                    "sampling_params": {
                        "$ref": "#/components/schemas/SamplingParams"
                    },
                    "input_shields": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/EvalTask"
@ -2768,7 +2729,7 @@
                    "input_shields": {
                        "type": "array",
                        "items": {
-                            "type": "string"
+                            "$ref": "#/components/schemas/ToolDef"
                        }
                    },
                    "output_shields": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -1616,12 +1616,6 @@ components:
    EvalTask:
      type: object
      properties:
        identifier:
          type: string
        provider_resource_id:
          type: string
        provider_id:
          type: string
        type:
          type: string
          const: eval_task
@ -1644,9 +1638,6 @@ components:
              - type: object
      additionalProperties: false
      required:
        - identifier
        - provider_resource_id
        - provider_id
        - type
        - dataset_id
        - scoring_functions
@ -1654,7 +1645,9 @@ components:
    ListEvalTasksResponse:
      type: object
      properties:
-        data:
+        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        input_shields:
          type: array
          items:
            $ref: '#/components/schemas/EvalTask'
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@ -1214,7 +1214,7 @@
        "            \"sampling_params\": {\n",
        "                \"strategy\": {\n",
        "                    \"type\": \"greedy\",\n",
-        "                },\n",
+        "                },b\n",
        "                \"max_tokens\": 4096,\n",
        "                \"repeat_penalty\": 1.0,\n",
        "            },\n",
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -472,16 +472,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
    async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse:
        logger.warning("DEPRECATED: Use /eval/benchmarks instead")
        return await self.list_benchmarks()
        return await self.list_benchmarks()
    async def DEPRECATED_get_eval_task(
        self,
        task_id: str,
        eval_task_id: str,
    ) -> Optional[Benchmark]:
        logger.warning("DEPRECATED: Use /eval/benchmarks instead")
        return await self.get_benchmark(task_id)
        return await self.get_benchmark(eval_task_id)
    async def DEPRECATED_register_eval_task(
        self,
        task_id: str,
        eval_task_id: str,
        dataset_id: str,
        scoring_functions: List[str],
@ -490,6 +494,8 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
        metadata: Optional[Dict[str, Any]] = None,
    ) -> None:
        logger.warning("DEPRECATED: Use /eval/benchmarks instead")
        return await self.register_benchmark(
            benchmark_id=task_id,
        return await self.register_benchmark(
            benchmark_id=eval_task_id,
            dataset_id=dataset_id,