remove benchmark_id in eval path

This commit is contained in:
Xi Yan 2025-03-12 01:45:50 -07:00
parent deb24f5abe
commit fecfb32855
3 changed files with 43 additions and 50 deletions

View file

@ -230,7 +230,7 @@
} }
} }
}, },
"/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}": { "/v1/eval/jobs/{job_id}": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -269,18 +269,18 @@
"description": "Get the EvalJob object for a given job id and benchmark id.", "description": "Get the EvalJob object for a given job id and benchmark id.",
"parameters": [ "parameters": [
{ {
"name": "benchmark_id", "name": "job_id",
"in": "path", "in": "path",
"description": "The ID of the benchmark to run the evaluation on.", "description": "The ID of the job to get the status of.",
"required": true, "required": true,
"schema": { "schema": {
"type": "string" "type": "string"
} }
}, },
{ {
"name": "job_id", "name": "benchmark_id",
"in": "path", "in": "query",
"description": "The ID of the job to get the status of.", "description": "The ID of the benchmark to run the evaluation on.",
"required": true, "required": true,
"schema": { "schema": {
"type": "string" "type": "string"
@ -312,18 +312,18 @@
"description": "Cancel a job.", "description": "Cancel a job.",
"parameters": [ "parameters": [
{ {
"name": "benchmark_id", "name": "job_id",
"in": "path", "in": "path",
"description": "The ID of the benchmark to run the evaluation on.", "description": "The ID of the job to cancel.",
"required": true, "required": true,
"schema": { "schema": {
"type": "string" "type": "string"
} }
}, },
{ {
"name": "job_id", "name": "benchmark_id",
"in": "path", "in": "query",
"description": "The ID of the job to cancel.", "description": "The ID of the benchmark to run the evaluation on.",
"required": true, "required": true,
"schema": { "schema": {
"type": "string" "type": "string"
@ -1070,7 +1070,7 @@
} }
} }
}, },
"/v1/eval/benchmark/{benchmark_id}/jobs": { "/v1/eval/jobs": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -1100,17 +1100,7 @@
"Eval" "Eval"
], ],
"description": "Run an evaluation on a benchmark.", "description": "Run an evaluation on a benchmark.",
"parameters": [ "parameters": [],
{
"name": "benchmark_id",
"in": "path",
"description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
@ -6335,6 +6325,10 @@
"EvaluateBenchmarkRequest": { "EvaluateBenchmarkRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"benchmark_id": {
"type": "string",
"description": "The ID of the benchmark to run the evaluation on."
},
"candidate": { "candidate": {
"$ref": "#/components/schemas/EvalCandidate", "$ref": "#/components/schemas/EvalCandidate",
"description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }" "description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
@ -6342,6 +6336,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"benchmark_id",
"candidate" "candidate"
], ],
"title": "EvaluateBenchmarkRequest" "title": "EvaluateBenchmarkRequest"

View file

@ -142,7 +142,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/BatchCompletionRequest' $ref: '#/components/schemas/BatchCompletionRequest'
required: true required: true
/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}: /v1/eval/jobs/{job_id}:
get: get:
responses: responses:
'200': '200':
@ -168,19 +168,19 @@ paths:
description: >- description: >-
Get the EvalJob object for a given job id and benchmark id. Get the EvalJob object for a given job id and benchmark id.
parameters: parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id - name: job_id
in: path in: path
description: The ID of the job to get the status of. description: The ID of the job to get the status of.
required: true required: true
schema: schema:
type: string type: string
- name: benchmark_id
in: query
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
delete: delete:
responses: responses:
'200': '200':
@ -199,19 +199,19 @@ paths:
- Scoring - Scoring
description: Cancel a job. description: Cancel a job.
parameters: parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id - name: job_id
in: path in: path
description: The ID of the job to cancel. description: The ID of the job to cancel.
required: true required: true
schema: schema:
type: string type: string
- name: benchmark_id
in: query
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
/v1/post-training/job/cancel: /v1/post-training/job/cancel:
post: post:
responses: responses:
@ -736,7 +736,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/EmbeddingsRequest' $ref: '#/components/schemas/EmbeddingsRequest'
required: true required: true
/v1/eval/benchmark/{benchmark_id}/jobs: /v1/eval/jobs:
post: post:
responses: responses:
'200': '200':
@ -759,14 +759,7 @@ paths:
tags: tags:
- Eval - Eval
description: Run an evaluation on a benchmark. description: Run an evaluation on a benchmark.
parameters: parameters: []
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
requestBody: requestBody:
content: content:
application/json: application/json:
@ -4410,6 +4403,10 @@ components:
EvaluateBenchmarkRequest: EvaluateBenchmarkRequest:
type: object type: object
properties: properties:
benchmark_id:
type: string
description: >-
The ID of the benchmark to run the evaluation on.
candidate: candidate:
$ref: '#/components/schemas/EvalCandidate' $ref: '#/components/schemas/EvalCandidate'
description: >- description: >-
@ -4418,6 +4415,7 @@ components:
} - { "type": "agent", "config": {...}, } } - { "type": "agent", "config": {...}, }
additionalProperties: false additionalProperties: false
required: required:
- benchmark_id
- candidate - candidate
title: EvaluateBenchmarkRequest title: EvaluateBenchmarkRequest
EvalJob: EvalJob:

View file

@ -94,7 +94,7 @@ class EvalJob(CommonJobFields):
class Eval(Protocol): class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates.""" """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST") @webmethod(route="/eval/jobs", method="POST")
async def evaluate_benchmark( async def evaluate_benchmark(
self, self,
benchmark_id: str, benchmark_id: str,
@ -132,7 +132,7 @@ class Eval(Protocol):
:return: EvaluateResponse object containing generations and scores :return: EvaluateResponse object containing generations and scores
""" """
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="GET") @webmethod(route="/eval/jobs/{job_id}", method="GET")
async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]: async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]:
"""Get the EvalJob object for a given job id and benchmark id. """Get the EvalJob object for a given job id and benchmark id.
@ -142,7 +142,7 @@ class Eval(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="DELETE") @webmethod(route="/eval/jobs/{job_id}", method="DELETE")
async def cancel_job(self, benchmark_id: str, job_id: str) -> None: async def cancel_job(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job. """Cancel a job.