From fecfb32855d0b78b8d8943a210221f514e15fc77 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 12 Mar 2025 01:45:50 -0700
Subject: [PATCH] remove benchmark_id in eval path

---
 docs/_static/llama-stack-spec.html | 41 ++++++++++++--------------
 docs/_static/llama-stack-spec.yaml | 46 ++++++++++++++----------------
 llama_stack/apis/eval/eval.py      |  6 ++--
 3 files changed, 43 insertions(+), 50 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 1ef5effef..64a40ecb1 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -230,7 +230,7 @@
                 }
             }
         },
-        "/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}": {
+        "/v1/eval/jobs/{job_id}": {
             "get": {
                 "responses": {
                     "200": {
@@ -269,18 +269,18 @@
                 "description": "Get the EvalJob object for a given job id and benchmark id.",
                 "parameters": [
                     {
-                        "name": "benchmark_id",
+                        "name": "job_id",
                         "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "description": "The ID of the job to get the status of.",
                         "required": true,
                         "schema": {
                             "type": "string"
                         }
                     },
                     {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to get the status of.",
+                        "name": "benchmark_id",
+                        "in": "query",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -312,18 +312,18 @@
                 "description": "Cancel a job.",
                 "parameters": [
                     {
-                        "name": "benchmark_id",
+                        "name": "job_id",
                         "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
+                        "description": "The ID of the job to cancel.",
                         "required": true,
                         "schema": {
                             "type": "string"
                         }
                     },
                     {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to cancel.",
+                        "name": "benchmark_id",
+                        "in": "query",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -1070,7 +1070,7 @@
                 }
             }
         },
-        "/v1/eval/benchmark/{benchmark_id}/jobs": {
+        "/v1/eval/jobs": {
             "post": {
                 "responses": {
                     "200": {
@@ -1100,17 +1100,7 @@
                     "Eval"
                 ],
                 "description": "Run an evaluation on a benchmark.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
+                "parameters": [],
                 "requestBody": {
                     "content": {
                         "application/json": {
@@ -6335,6 +6325,10 @@
             "EvaluateBenchmarkRequest": {
                 "type": "object",
                 "properties": {
+                    "benchmark_id": {
+                        "type": "string",
+                        "description": "The ID of the benchmark to run the evaluation on."
+                    },
                     "candidate": {
                         "$ref": "#/components/schemas/EvalCandidate",
                         "description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
@@ -6342,6 +6336,7 @@
                 },
                 "additionalProperties": false,
                 "required": [
+                    "benchmark_id",
                     "candidate"
                 ],
                 "title": "EvaluateBenchmarkRequest"
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 7f5b96051..7d6961208 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -142,7 +142,7 @@ paths:
             schema:
               $ref: '#/components/schemas/BatchCompletionRequest'
         required: true
-  /v1/eval/benchmark/{benchmark_id}/jobs/{job_id}:
+  /v1/eval/jobs/{job_id}:
     get:
       responses:
         '200':
@@ -168,19 +168,19 @@ paths:
       description: >-
         Get the EvalJob object for a given job id and benchmark id.
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
         - name: job_id
           in: path
           description: The ID of the job to get the status of.
           required: true
           schema:
             type: string
+        - name: benchmark_id
+          in: query
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
     delete:
       responses:
         '200':
@@ -199,19 +199,19 @@ paths:
         - Scoring
       description: Cancel a job.
       parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
         - name: job_id
           in: path
           description: The ID of the job to cancel.
           required: true
           schema:
             type: string
+        - name: benchmark_id
+          in: query
+          description: >-
+            The ID of the benchmark to run the evaluation on.
+          required: true
+          schema:
+            type: string
   /v1/post-training/job/cancel:
     post:
       responses:
@@ -736,7 +736,7 @@ paths:
             schema:
               $ref: '#/components/schemas/EmbeddingsRequest'
         required: true
-  /v1/eval/benchmark/{benchmark_id}/jobs:
+  /v1/eval/jobs:
     post:
       responses:
         '200':
@@ -759,14 +759,7 @@ paths:
       tags:
         - Eval
       description: Run an evaluation on a benchmark.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
+      parameters: []
       requestBody:
         content:
           application/json:
@@ -4410,6 +4403,10 @@ components:
     EvaluateBenchmarkRequest:
       type: object
       properties:
+        benchmark_id:
+          type: string
+          description: >-
+            The ID of the benchmark to run the evaluation on.
         candidate:
           $ref: '#/components/schemas/EvalCandidate'
           description: >-
@@ -4418,6 +4415,7 @@ components:
             } - { "type": "agent", "config": {...}, }
       additionalProperties: false
       required:
+        - benchmark_id
         - candidate
       title: EvaluateBenchmarkRequest
     EvalJob:
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index e39d91756..8865e2cd6 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -94,7 +94,7 @@ class EvalJob(CommonJobFields):
 class Eval(Protocol):
     """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
 
-    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/jobs", method="POST")
     async def evaluate_benchmark(
         self,
         benchmark_id: str,
@@ -132,7 +132,7 @@ class Eval(Protocol):
         :return: EvaluateResponse object containing generations and scores
         """
 
-    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="GET")
+    @webmethod(route="/eval/jobs/{job_id}", method="GET")
     async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]:
         """Get the EvalJob object for a given job id and benchmark id.
 
@@ -142,7 +142,7 @@ class Eval(Protocol):
         """
         ...
 
-    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    @webmethod(route="/eval/jobs/{job_id}", method="DELETE")
     async def cancel_job(self, benchmark_id: str, job_id: str) -> None:
         """Cancel a job.