mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 13:00:39 +00:00
remove benchmark_id in eval path
This commit is contained in:
parent
deb24f5abe
commit
fecfb32855
3 changed files with 43 additions and 50 deletions
41
docs/_static/llama-stack-spec.html
vendored
41
docs/_static/llama-stack-spec.html
vendored
|
@ -230,7 +230,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}": {
|
"/v1/eval/jobs/{job_id}": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -269,18 +269,18 @@
|
||||||
"description": "Get the EvalJob object for a given job id and benchmark id.",
|
"description": "Get the EvalJob object for a given job id and benchmark id.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "job_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
"description": "The ID of the benchmark to run the evaluation on.",
|
"description": "The ID of the job to get the status of.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "job_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "query",
|
||||||
"description": "The ID of the job to get the status of.",
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -312,18 +312,18 @@
|
||||||
"description": "Cancel a job.",
|
"description": "Cancel a job.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "job_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
"description": "The ID of the benchmark to run the evaluation on.",
|
"description": "The ID of the job to cancel.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "job_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "query",
|
||||||
"description": "The ID of the job to cancel.",
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -1070,7 +1070,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/eval/benchmark/{benchmark_id}/jobs": {
|
"/v1/eval/jobs": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1100,17 +1100,7 @@
|
||||||
"Eval"
|
"Eval"
|
||||||
],
|
],
|
||||||
"description": "Run an evaluation on a benchmark.",
|
"description": "Run an evaluation on a benchmark.",
|
||||||
"parameters": [
|
"parameters": [],
|
||||||
{
|
|
||||||
"name": "benchmark_id",
|
|
||||||
"in": "path",
|
|
||||||
"description": "The ID of the benchmark to run the evaluation on.",
|
|
||||||
"required": true,
|
|
||||||
"schema": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
|
@ -6335,6 +6325,10 @@
|
||||||
"EvaluateBenchmarkRequest": {
|
"EvaluateBenchmarkRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"benchmark_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The ID of the benchmark to run the evaluation on."
|
||||||
|
},
|
||||||
"candidate": {
|
"candidate": {
|
||||||
"$ref": "#/components/schemas/EvalCandidate",
|
"$ref": "#/components/schemas/EvalCandidate",
|
||||||
"description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
|
"description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
|
||||||
|
@ -6342,6 +6336,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
|
"benchmark_id",
|
||||||
"candidate"
|
"candidate"
|
||||||
],
|
],
|
||||||
"title": "EvaluateBenchmarkRequest"
|
"title": "EvaluateBenchmarkRequest"
|
||||||
|
|
46
docs/_static/llama-stack-spec.yaml
vendored
46
docs/_static/llama-stack-spec.yaml
vendored
|
@ -142,7 +142,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}:
|
/v1/eval/jobs/{job_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -168,19 +168,19 @@ paths:
|
||||||
description: >-
|
description: >-
|
||||||
Get the EvalJob object for a given job id and benchmark id.
|
Get the EvalJob object for a given job id and benchmark id.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the benchmark to run the evaluation on.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
- name: job_id
|
- name: job_id
|
||||||
in: path
|
in: path
|
||||||
description: The ID of the job to get the status of.
|
description: The ID of the job to get the status of.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
- name: benchmark_id
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
delete:
|
delete:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -199,19 +199,19 @@ paths:
|
||||||
- Scoring
|
- Scoring
|
||||||
description: Cancel a job.
|
description: Cancel a job.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the benchmark to run the evaluation on.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
- name: job_id
|
- name: job_id
|
||||||
in: path
|
in: path
|
||||||
description: The ID of the job to cancel.
|
description: The ID of the job to cancel.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
- name: benchmark_id
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/post-training/job/cancel:
|
/v1/post-training/job/cancel:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -736,7 +736,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/EmbeddingsRequest'
|
$ref: '#/components/schemas/EmbeddingsRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/eval/benchmark/{benchmark_id}/jobs:
|
/v1/eval/jobs:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -759,14 +759,7 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- Eval
|
- Eval
|
||||||
description: Run an evaluation on a benchmark.
|
description: Run an evaluation on a benchmark.
|
||||||
parameters:
|
parameters: []
|
||||||
- name: benchmark_id
|
|
||||||
in: path
|
|
||||||
description: >-
|
|
||||||
The ID of the benchmark to run the evaluation on.
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
|
@ -4410,6 +4403,10 @@ components:
|
||||||
EvaluateBenchmarkRequest:
|
EvaluateBenchmarkRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
benchmark_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
candidate:
|
candidate:
|
||||||
$ref: '#/components/schemas/EvalCandidate'
|
$ref: '#/components/schemas/EvalCandidate'
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -4418,6 +4415,7 @@ components:
|
||||||
} - { "type": "agent", "config": {...}, }
|
} - { "type": "agent", "config": {...}, }
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
|
- benchmark_id
|
||||||
- candidate
|
- candidate
|
||||||
title: EvaluateBenchmarkRequest
|
title: EvaluateBenchmarkRequest
|
||||||
EvalJob:
|
EvalJob:
|
||||||
|
|
|
@ -94,7 +94,7 @@ class EvalJob(CommonJobFields):
|
||||||
class Eval(Protocol):
|
class Eval(Protocol):
|
||||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
|
@webmethod(route="/eval/jobs", method="POST")
|
||||||
async def evaluate_benchmark(
|
async def evaluate_benchmark(
|
||||||
self,
|
self,
|
||||||
benchmark_id: str,
|
benchmark_id: str,
|
||||||
|
@ -132,7 +132,7 @@ class Eval(Protocol):
|
||||||
:return: EvaluateResponse object containing generations and scores
|
:return: EvaluateResponse object containing generations and scores
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="GET")
|
@webmethod(route="/eval/jobs/{job_id}", method="GET")
|
||||||
async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]:
|
async def get_job(self, benchmark_id: str, job_id: str) -> Optional[EvalJob]:
|
||||||
"""Get the EvalJob object for a given job id and benchmark id.
|
"""Get the EvalJob object for a given job id and benchmark id.
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ class Eval(Protocol):
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs/{job_id}", method="DELETE")
|
@webmethod(route="/eval/jobs/{job_id}", method="DELETE")
|
||||||
async def cancel_job(self, benchmark_id: str, job_id: str) -> None:
|
async def cancel_job(self, benchmark_id: str, job_id: str) -> None:
|
||||||
"""Cancel a job.
|
"""Cancel a job.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue