diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 1ef5effef..64a40ecb1 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -230,7 +230,7 @@
}
}
},
- "/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}": {
+ "/v1/eval/jobs/{job_id}": {
"get": {
"responses": {
"200": {
@@ -269,18 +269,18 @@
"description": "Get the EvalJob object for a given job id and benchmark id.",
"parameters": [
{
- "name": "benchmark_id",
+ "name": "job_id",
"in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
+ "description": "The ID of the job to get the status of.",
"required": true,
"schema": {
"type": "string"
}
},
{
- "name": "job_id",
- "in": "path",
- "description": "The ID of the job to get the status of.",
+ "name": "benchmark_id",
+ "in": "query",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -312,18 +312,18 @@
"description": "Cancel a job.",
"parameters": [
{
- "name": "benchmark_id",
+ "name": "job_id",
"in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
+ "description": "The ID of the job to cancel.",
"required": true,
"schema": {
"type": "string"
}
},
{
- "name": "job_id",
- "in": "path",
- "description": "The ID of the job to cancel.",
+ "name": "benchmark_id",
+ "in": "query",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -1070,7 +1070,7 @@
}
}
},
- "/v1/eval/benchmark/{benchmark_id}/jobs": {
+ "/v1/eval/jobs": {
"post": {
"responses": {
"200": {
@@ -1100,17 +1100,7 @@
"Eval"
],
"description": "Run an evaluation on a benchmark.",
- "parameters": [
- {
- "name": "benchmark_id",
- "in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
+ "parameters": [],
"requestBody": {
"content": {
"application/json": {
@@ -6335,6 +6325,10 @@
"EvaluateBenchmarkRequest": {
"type": "object",
"properties": {
+ "benchmark_id": {
+ "type": "string",
+ "description": "The ID of the benchmark to run the evaluation on."
+ },
"candidate": {
"$ref": "#/components/schemas/EvalCandidate",
"description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
@@ -6342,6 +6336,7 @@
},
"additionalProperties": false,
"required": [
+ "benchmark_id",
"candidate"
],
"title": "EvaluateBenchmarkRequest"
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 7f5b96051..7d6961208 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -142,7 +142,7 @@ paths:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
- /v1/eval/benchmark/{benchmark_id}/jobs/{job_id}:
+ /v1/eval/jobs/{job_id}:
get:
responses:
'200':
@@ -168,19 +168,19 @@ paths:
description: >-
Get the EvalJob object for a given job id and benchmark id.
parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
- name: job_id
in: path
description: The ID of the job to get the status of.
required: true
schema:
type: string
+ - name: benchmark_id
+ in: query
+ description: >-
+ The ID of the benchmark to run the evaluation on.
+ required: true
+ schema:
+ type: string
delete:
responses:
'200':
@@ -199,19 +199,19 @@ paths:
- Scoring
description: Cancel a job.
parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
- name: job_id
in: path
description: The ID of the job to cancel.
required: true
schema:
type: string
+ - name: benchmark_id
+ in: query
+ description: >-
+ The ID of the benchmark to run the evaluation on.
+ required: true
+ schema:
+ type: string
/v1/post-training/job/cancel:
post:
responses:
@@ -736,7 +736,7 @@ paths:
schema:
$ref: '#/components/schemas/EmbeddingsRequest'
required: true
- /v1/eval/benchmark/{benchmark_id}/jobs:
+ /v1/eval/jobs:
post:
responses:
'200':
@@ -759,14 +759,7 @@ paths:
tags:
- Eval
description: Run an evaluation on a benchmark.
- parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
+ parameters: []
requestBody:
content:
application/json:
@@ -4410,6 +4403,10 @@ components:
EvaluateBenchmarkRequest:
type: object
properties:
+ benchmark_id:
+ type: string
+ description: >-
+ The ID of the benchmark to run the evaluation on.
candidate:
$ref: '#/components/schemas/EvalCandidate'
description: >-
@@ -4418,6 +4415,7 @@ components:
} - { "type": "agent", "config": {...}, }
additionalProperties: false
required:
+ - benchmark_id
- candidate
title: EvaluateBenchmarkRequest
EvalJob:
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index e39d91756..8865e2cd6 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -94,7 +94,7 @@ class EvalJob(CommonJobFields):
class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
- @webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
+ @webmethod(route="/eval/jobs", method="POST")
async def evaluate_benchmark(
self,
benchmark_id: str,
@@ -132,7 +132,7 @@ class Eval(Protocol):
:return: EvaluateResponse object containing generations and scores
"""
- @webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="GET")
+ @webmethod(route="/eval/jobs/{job_id}", method="GET")
async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]:
"""Get the EvalJob object for a given job id and benchmark id.
@@ -142,7 +142,7 @@ class Eval(Protocol):
"""
...
- @webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="DELETE")
+ @webmethod(route="/eval/jobs/{job_id}", method="DELETE")
async def cancel_job(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job.