update

2025-08-10 20:19:22 +00:00 · 2025-02-13 09:48:52 -08:00 · 2025-02-13 09:48:52 -08:00 · 9ce00ede9b
commit 9ce00ede9b
parent ceff63130d
4 changed files with 18 additions and 58 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -2665,15 +2665,6 @@
            "EvalTask": {
                "type": "object",
                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
                    "type": {
                        "type": "string",
                        "const": "eval_task",
@ -2682,53 +2673,23 @@
                    "dataset_id": {
                        "type": "string"
                    },
-                    "scoring_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
+                    "config": {
+                        "$ref": "#/components/schemas/AgentConfig"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "identifier",
-                    "provider_resource_id",
-                    "provider_id",
                    "type",
-                    "dataset_id",
-                    "scoring_functions",
-                    "metadata"
+                    "config"
                ]
            },
            "ListEvalTasksResponse": {
                "type": "object",
                "properties": {
-                    "data": {
+                    "sampling_params": {
+                        "$ref": "#/components/schemas/SamplingParams"
+                    },
+                    "input_shields": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/EvalTask"
@ -2768,7 +2729,7 @@
                    "input_shields": {
                        "type": "array",
                        "items": {
-                            "type": "string"
+                            "$ref": "#/components/schemas/ToolDef"
                        }
                    },
                    "output_shields": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -1616,12 +1616,6 @@ components:
    EvalTask:
      type: object
      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
        type:
          type: string
          const: eval_task
@ -1644,9 +1638,6 @@ components:
              - type: object
      additionalProperties: false
      required:
-        - identifier
-        - provider_resource_id
-        - provider_id
        - type
        - dataset_id
        - scoring_functions
@ -1654,7 +1645,9 @@ components:
    ListEvalTasksResponse:
      type: object
      properties:
-        data:
+        sampling_params:
+          $ref: '#/components/schemas/SamplingParams'
+        input_shields:
          type: array
          items:
            $ref: '#/components/schemas/EvalTask'
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@ -1214,7 +1214,7 @@
        "            \"sampling_params\": {\n",
        "                \"strategy\": {\n",
        "                    \"type\": \"greedy\",\n",
-        "                },\n",
+        "                },b\n",
        "                \"max_tokens\": 4096,\n",
        "                \"repeat_penalty\": 1.0,\n",
        "            },\n",
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -472,16 +472,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
    async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse:
        logger.warning("DEPRECATED: Use /eval/benchmarks instead")
        return await self.list_benchmarks()
+        return await self.list_benchmarks()

    async def DEPRECATED_get_eval_task(
        self,
+        task_id: str,
        eval_task_id: str,
    ) -> Optional[Benchmark]:
        logger.warning("DEPRECATED: Use /eval/benchmarks instead")
+        return await self.get_benchmark(task_id)
        return await self.get_benchmark(eval_task_id)

    async def DEPRECATED_register_eval_task(
        self,
+        task_id: str,
        eval_task_id: str,
        dataset_id: str,
        scoring_functions: List[str],
@ -490,6 +494,8 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
        metadata: Optional[Dict[str, Any]] = None,
    ) -> None:
        logger.warning("DEPRECATED: Use /eval/benchmarks instead")
+        return await self.register_benchmark(
+            benchmark_id=task_id,
        return await self.register_benchmark(
            benchmark_id=eval_task_id,
            dataset_id=dataset_id,