remove benchmark_id in eval path

2025-12-31 07:59:59 +00:00 · 2025-03-12 01:45:50 -07:00 · 2025-03-12 01:45:50 -07:00 · fecfb32855
commit fecfb32855
parent deb24f5abe
3 changed files with 43 additions and 50 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -230,7 +230,7 @@
                }
            }
        },
-        "/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}": {
+        "/v1/eval/jobs/{job_id}": {
            "get": {
                "responses": {
                    "200": {
@ -269,18 +269,18 @@
                "description": "Get the EvalJob object for a given job id and benchmark id.",
                "parameters": [
                    {
-                        "name": "benchmark_id",
+                        "name": "job_id",
                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "description": "The ID of the job to get the status of.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to get the status of.",
+                        "name": "benchmark_id",
+                        "in": "query",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -312,18 +312,18 @@
                "description": "Cancel a job.",
                "parameters": [
                    {
-                        "name": "benchmark_id",
+                        "name": "job_id",
                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "description": "The ID of the job to cancel.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to cancel.",
+                        "name": "benchmark_id",
+                        "in": "query",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -1070,7 +1070,7 @@
                }
            }
        },
-        "/v1/eval/benchmark/{benchmark_id}/jobs": {
+        "/v1/eval/jobs": {
            "post": {
                "responses": {
                    "200": {
@ -1100,17 +1100,7 @@
                    "Eval"
                ],
                "description": "Run an evaluation on a benchmark.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
+                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
@ -6335,6 +6325,10 @@
            "EvaluateBenchmarkRequest": {
                "type": "object",
                "properties": {
+                    "benchmark_id": {
+                        "type": "string",
+                        "description": "The ID of the benchmark to run the evaluation on."
+                    },
                    "candidate": {
                        "$ref": "#/components/schemas/EvalCandidate",
                        "description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
@ -6342,6 +6336,7 @@
                },
                "additionalProperties": false,
                "required": [
+                    "benchmark_id",
                    "candidate"
                ],
                "title": "EvaluateBenchmarkRequest"
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -142,7 +142,7 @@ paths:
            schema:
              $ref: '#/components/schemas/BatchCompletionRequest'
        required: true
-  /v1/eval/benchmark/{benchmark_id}/jobs/{job_id}:
+  /v1/eval/jobs/{job_id}:
    get:
      responses:
        '200':
@ -168,19 +168,19 @@ paths:
      description: >-
        Get the EvalJob object for a given job id and benchmark id.
      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
        - name: job_id
          in: path
          description: The ID of the job to get the status of.
          required: true
          schema:
            type: string
+        - name: benchmark_id
+          in: query
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
    delete:
      responses:
        '200':
@ -199,19 +199,19 @@ paths:
        - Scoring
      description: Cancel a job.
      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
        - name: job_id
          in: path
          description: The ID of the job to cancel.
          required: true
          schema:
            type: string
+        - name: benchmark_id
+          in: query
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
  /v1/post-training/job/cancel:
    post:
      responses:
@ -736,7 +736,7 @@ paths:
            schema:
              $ref: '#/components/schemas/EmbeddingsRequest'
        required: true
-  /v1/eval/benchmark/{benchmark_id}/jobs:
+  /v1/eval/jobs:
    post:
      responses:
        '200':
@ -759,14 +759,7 @@ paths:
      tags:
        - Eval
      description: Run an evaluation on a benchmark.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      parameters: []
      requestBody:
        content:
          application/json:
@ -4410,6 +4403,10 @@ components:
    EvaluateBenchmarkRequest:
      type: object
      properties:
+        benchmark_id:
+          type: string
+          description: >-
+            The ID of the benchmark to run the evaluation on.
        candidate:
          $ref: '#/components/schemas/EvalCandidate'
          description: >-
@ -4418,6 +4415,7 @@ components:
            } - { "type": "agent", "config": {...}, }
      additionalProperties: false
      required:
+        - benchmark_id
        - candidate
      title: EvaluateBenchmarkRequest
    EvalJob:
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -94,7 +94,7 @@ class EvalJob(CommonJobFields):
 class Eval(Protocol):
    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""

-    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/jobs", method="POST")
    async def evaluate_benchmark(
        self,
        benchmark_id: str,
@ -132,7 +132,7 @@ class Eval(Protocol):
        :return: EvaluateResponse object containing generations and scores
        """

-    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="GET")
+    @webmethod(route="/eval/jobs/{job_id}", method="GET")
    async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]:
        """Get the EvalJob object for a given job id and benchmark id.

@ -142,7 +142,7 @@ class Eval(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    @webmethod(route="/eval/jobs/{job_id}", method="DELETE")
    async def cancel_job(self, benchmark_id: str, job_id: str) -> None:
        """Cancel a job.