This commit is contained in:
Xi Yan 2025-02-12 20:48:05 -08:00
parent e07776fff6
commit ec721b3867
4 changed files with 695 additions and 334 deletions

View file

@ -67,8 +67,8 @@
"description": "",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"name": "task_id",
"in": "query",
"required": true,
"schema": {
"type": "string"
@ -114,7 +114,7 @@
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DeprecatedRegisterBenchmarkRequest"
"$ref": "#/components/schemas/DeprecatedRegisterEvalTaskRequest"
}
}
},
@ -613,7 +613,7 @@
}
}
},
"/v1/eval/tasks/{benchmark_id}/evaluations": {
"/v1/eval/benchmarks/{benchmark_id}/evaluations": {
"post": {
"responses": {
"200": {
@ -653,6 +653,47 @@
}
}
},
"/v1/eval/tasks/{task_id}/evaluations": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluateResponse"
}
}
}
}
},
"tags": [
"Eval"
],
"description": "",
"parameters": [
{
"name": "task_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluateRowsDeprecatedRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}": {
"get": {
"responses": {
@ -753,6 +794,43 @@
]
}
},
"/v1/eval/benchmarks/{benchmark_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/Benchmark"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Benchmarks"
],
"description": "",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/datasets/{dataset_id}": {
"get": {
"responses": {
@ -811,43 +889,6 @@
]
}
},
"/v1/eval/tasks/{benchmark_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/Benchmark"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Benchmarks"
],
"description": "",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/models/{model_id}": {
"get": {
"responses": {
@ -1431,7 +1472,7 @@
}
}
},
"/v1/eval/tasks/{benchmark_id}/jobs/{job_id}": {
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
"get": {
"responses": {
"200": {
@ -1505,7 +1546,83 @@
]
}
},
"/v1/eval/tasks/{benchmark_id}/jobs/{job_id}/result": {
"/v1/eval/tasks/{task_id}/jobs/{job_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/JobStatus"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Eval"
],
"description": "",
"parameters": [
{
"name": "task_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "job_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
},
"delete": {
"responses": {
"200": {
"description": "OK"
}
},
"tags": [
"Eval"
],
"description": "",
"parameters": [
{
"name": "task_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "job_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
"get": {
"responses": {
"200": {
@ -1525,7 +1642,7 @@
"description": "",
"parameters": [
{
"name": "job_id",
"name": "benchmark_id",
"in": "path",
"required": true,
"schema": {
@ -1533,7 +1650,7 @@
}
},
{
"name": "benchmark_id",
"name": "job_id",
"in": "path",
"required": true,
"schema": {
@ -1543,6 +1660,88 @@
]
}
},
"/v1/eval/tasks/{task_id}/jobs/{job_id}/result": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluateResponse"
}
}
}
}
},
"tags": [
"Eval"
],
"description": "",
"parameters": [
{
"name": "task_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "job_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/eval/benchmarks": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBenchmarksResponse"
}
}
}
}
},
"tags": [
"Benchmarks"
],
"description": "",
"parameters": []
},
"post": {
"responses": {
"200": {
"description": "OK"
}
},
"tags": [
"Benchmarks"
],
"description": "",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/RegisterBenchmarkRequest"
}
}
},
"required": true
}
}
},
"/v1/datasets": {
"get": {
"responses": {
@ -1586,49 +1785,6 @@
}
}
},
"/v1/eval/tasks": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBenchmarksResponse"
}
}
}
}
},
"tags": [
"Benchmarks"
],
"description": "",
"parameters": []
},
"post": {
"responses": {
"200": {
"description": "OK"
}
},
"tags": [
"Benchmarks"
],
"description": "",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/RegisterBenchmarkRequest"
}
}
},
"required": true
}
}
},
"/v1/models": {
"get": {
"responses": {
@ -2204,7 +2360,7 @@
]
}
},
"/v1/eval/tasks/{benchmark_id}/jobs": {
"/v1/eval/benchmarks/{benchmark_id}/jobs": {
"post": {
"responses": {
"200": {
@ -2244,6 +2400,47 @@
}
}
},
"/v1/eval/tasks/{task_id}/jobs": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Job"
}
}
}
}
},
"tags": [
"Eval"
],
"description": "",
"parameters": [
{
"name": "task_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/RunEvalDeprecatedRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/safety/run-shield": {
"post": {
"responses": {
@ -2526,10 +2723,10 @@
"data"
]
},
"DeprecatedRegisterBenchmarkRequest": {
"DeprecatedRegisterEvalTaskRequest": {
"type": "object",
"properties": {
"benchmark_id": {
"task_id": {
"type": "string"
},
"dataset_id": {
@ -2575,7 +2772,7 @@
},
"additionalProperties": false,
"required": [
"benchmark_id",
"task_id",
"dataset_id",
"scoring_functions"
]
@ -4745,34 +4942,6 @@
"accuracy"
]
},
"AppBenchmarkConfig": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "app",
"default": "app"
},
"eval_candidate": {
"$ref": "#/components/schemas/EvalCandidate"
},
"scoring_params": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringFnParams"
}
},
"num_examples": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"eval_candidate",
"scoring_params"
]
},
"BasicScoringFnParams": {
"type": "object",
"properties": {
@ -4793,25 +4962,26 @@
"type"
]
},
"BenchmarkBenchmarkConfig": {
"BenchmarkConfig": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "benchmark",
"default": "benchmark"
},
"eval_candidate": {
"$ref": "#/components/schemas/EvalCandidate"
},
"scoring_params": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringFnParams"
}
},
"num_examples": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"type",
"eval_candidate"
"eval_candidate",
"scoring_params"
]
},
"EvalCandidate": {
@ -4831,23 +5001,6 @@
}
}
},
"BenchmarkConfig": {
"oneOf": [
{
"$ref": "#/components/schemas/BenchmarkBenchmarkConfig"
},
{
"$ref": "#/components/schemas/AppBenchmarkConfig"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"benchmark": "#/components/schemas/BenchmarkBenchmarkConfig",
"app": "#/components/schemas/AppBenchmarkConfig"
}
}
},
"LLMAsJudgeScoringFnParams": {
"type": "object",
"properties": {
@ -5108,6 +5261,54 @@
"aggregated_results"
]
},
"EvaluateRowsDeprecatedRequest": {
"type": "object",
"properties": {
"input_rows": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"scoring_functions": {
"type": "array",
"items": {
"type": "string"
}
},
"task_config": {
"$ref": "#/components/schemas/BenchmarkConfig"
}
},
"additionalProperties": false,
"required": [
"input_rows",
"scoring_functions",
"task_config"
]
},
"Session": {
"type": "object",
"properties": {
@ -7304,60 +7505,6 @@
"data"
]
},
"RegisterDatasetRequest": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"dataset_schema": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ParamType"
}
},
"url": {
"$ref": "#/components/schemas/URL"
},
"provider_dataset_id": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"dataset_id",
"dataset_schema",
"url"
]
},
"RegisterBenchmarkRequest": {
"type": "object",
"properties": {
@ -7412,6 +7559,60 @@
"scoring_functions"
]
},
"RegisterDatasetRequest": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"dataset_schema": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ParamType"
}
},
"url": {
"$ref": "#/components/schemas/URL"
},
"provider_dataset_id": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"dataset_id",
"dataset_schema",
"url"
]
},
"RegisterModelRequest": {
"type": "object",
"properties": {
@ -7623,6 +7824,18 @@
"job_id"
]
},
"RunEvalDeprecatedRequest": {
"type": "object",
"properties": {
"task_config": {
"$ref": "#/components/schemas/BenchmarkConfig"
}
},
"additionalProperties": false,
"required": [
"task_config"
]
},
"RunShieldRequest": {
"type": "object",
"properties": {
@ -8105,6 +8318,9 @@
{
"name": "BatchInference (Coming Soon)"
},
{
"name": "Benchmarks"
},
{
"name": "DatasetIO"
},
@ -8114,9 +8330,6 @@
{
"name": "Eval"
},
{
"name": "Benchmarks"
},
{
"name": "Inference",
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@ -8168,10 +8381,10 @@
"tags": [
"Agents",
"BatchInference (Coming Soon)",
"Benchmarks",
"DatasetIO",
"Datasets",
"Eval",
"Benchmarks",
"Inference",
"Inspect",
"Models",

View file

@ -25,8 +25,8 @@ paths:
- Benchmarks
description: ''
parameters:
- name: benchmark_id
in: path
- name: task_id
in: query
required: true
schema:
type: string
@ -57,7 +57,7 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/DeprecatedRegisterBenchmarkRequest'
$ref: '#/components/schemas/DeprecatedRegisterEvalTaskRequest'
required: true
deprecated: true
/v1/datasetio/rows:
@ -372,7 +372,7 @@ paths:
schema:
$ref: '#/components/schemas/EmbeddingsRequest'
required: true
/v1/eval/tasks/{benchmark_id}/evaluations:
/v1/eval/benchmarks/{benchmark_id}/evaluations:
post:
responses:
'200':
@ -396,6 +396,31 @@ paths:
schema:
$ref: '#/components/schemas/EvaluateRowsRequest'
required: true
/v1/eval/tasks/{task_id}/evaluations:
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
tags:
- Eval
description: ''
parameters:
- name: task_id
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateRowsDeprecatedRequest'
required: true
deprecated: true
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}:
get:
responses:
@ -457,6 +482,26 @@ paths:
required: true
schema:
type: string
/v1/eval/benchmarks/{benchmark_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/Benchmark'
- type: 'null'
tags:
- Benchmarks
description: ''
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
/v1/datasets/{dataset_id}:
get:
responses:
@ -490,26 +535,6 @@ paths:
required: true
schema:
type: string
/v1/eval/tasks/{benchmark_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/Benchmark'
- type: 'null'
tags:
- Benchmarks
description: ''
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
/v1/models/{model_id}:
get:
responses:
@ -852,7 +877,7 @@ paths:
schema:
$ref: '#/components/schemas/InvokeToolRequest'
required: true
/v1/eval/tasks/{benchmark_id}/jobs/{job_id}:
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get:
responses:
'200':
@ -895,7 +920,52 @@ paths:
required: true
schema:
type: string
/v1/eval/tasks/{benchmark_id}/jobs/{job_id}/result:
/v1/eval/tasks/{task_id}/jobs/{job_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/JobStatus'
- type: 'null'
tags:
- Eval
description: ''
parameters:
- name: task_id
in: path
required: true
schema:
type: string
- name: job_id
in: path
required: true
schema:
type: string
deprecated: true
delete:
responses:
'200':
description: OK
tags:
- Eval
description: ''
parameters:
- name: task_id
in: path
required: true
schema:
type: string
- name: job_id
in: path
required: true
schema:
type: string
deprecated: true
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
get:
responses:
'200':
@ -908,16 +978,67 @@ paths:
- Eval
description: ''
parameters:
- name: job_id
in: path
required: true
schema:
type: string
- name: benchmark_id
in: path
required: true
schema:
type: string
- name: job_id
in: path
required: true
schema:
type: string
/v1/eval/tasks/{task_id}/jobs/{job_id}/result:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
tags:
- Eval
description: ''
parameters:
- name: task_id
in: path
required: true
schema:
type: string
- name: job_id
in: path
required: true
schema:
type: string
deprecated: true
/v1/eval/benchmarks:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListBenchmarksResponse'
tags:
- Benchmarks
description: ''
parameters: []
post:
responses:
'200':
description: OK
tags:
- Benchmarks
description: ''
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true
/v1/datasets:
get:
responses:
@ -945,33 +1066,6 @@ paths:
schema:
$ref: '#/components/schemas/RegisterDatasetRequest'
required: true
/v1/eval/tasks:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListBenchmarksResponse'
tags:
- Benchmarks
description: ''
parameters: []
post:
responses:
'200':
description: OK
tags:
- Benchmarks
description: ''
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true
/v1/models:
get:
responses:
@ -1328,7 +1422,7 @@ paths:
type: array
items:
type: string
/v1/eval/tasks/{benchmark_id}/jobs:
/v1/eval/benchmarks/{benchmark_id}/jobs:
post:
responses:
'200':
@ -1352,6 +1446,31 @@ paths:
schema:
$ref: '#/components/schemas/RunEvalRequest'
required: true
/v1/eval/tasks/{task_id}/jobs:
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
tags:
- Eval
description: ''
parameters:
- name: task_id
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RunEvalDeprecatedRequest'
required: true
deprecated: true
/v1/safety/run-shield:
post:
responses:
@ -1527,10 +1646,10 @@ components:
additionalProperties: false
required:
- data
DeprecatedRegisterBenchmarkRequest:
DeprecatedRegisterEvalTaskRequest:
type: object
properties:
benchmark_id:
task_id:
type: string
dataset_id:
type: string
@ -1554,7 +1673,7 @@ components:
- type: object
additionalProperties: false
required:
- benchmark_id
- task_id
- dataset_id
- scoring_functions
AppendRowsRequest:
@ -3063,26 +3182,6 @@ components:
- median
- categorical_count
- accuracy
AppBenchmarkConfig:
type: object
properties:
type:
type: string
const: app
default: app
eval_candidate:
$ref: '#/components/schemas/EvalCandidate'
scoring_params:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringFnParams'
num_examples:
type: integer
additionalProperties: false
required:
- type
- eval_candidate
- scoring_params
BasicScoringFnParams:
type: object
properties:
@ -3097,21 +3196,21 @@ components:
additionalProperties: false
required:
- type
BenchmarkBenchmarkConfig:
BenchmarkConfig:
type: object
properties:
type:
type: string
const: benchmark
default: benchmark
eval_candidate:
$ref: '#/components/schemas/EvalCandidate'
scoring_params:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringFnParams'
num_examples:
type: integer
additionalProperties: false
required:
- type
- eval_candidate
- scoring_params
EvalCandidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
@ -3121,15 +3220,6 @@ components:
mapping:
model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate'
BenchmarkConfig:
oneOf:
- $ref: '#/components/schemas/BenchmarkBenchmarkConfig'
- $ref: '#/components/schemas/AppBenchmarkConfig'
discriminator:
propertyName: type
mapping:
benchmark: '#/components/schemas/BenchmarkBenchmarkConfig'
app: '#/components/schemas/AppBenchmarkConfig'
LLMAsJudgeScoringFnParams:
type: object
properties:
@ -3278,6 +3368,32 @@ components:
required:
- score_rows
- aggregated_results
EvaluateRowsDeprecatedRequest:
type: object
properties:
input_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
scoring_functions:
type: array
items:
type: string
task_config:
$ref: '#/components/schemas/BenchmarkConfig'
additionalProperties: false
required:
- input_rows
- scoring_functions
- task_config
Session:
type: object
properties:
@ -4645,36 +4761,6 @@ components:
additionalProperties: false
required:
- data
RegisterDatasetRequest:
type: object
properties:
dataset_id:
type: string
dataset_schema:
type: object
additionalProperties:
$ref: '#/components/schemas/ParamType'
url:
$ref: '#/components/schemas/URL'
provider_dataset_id:
type: string
provider_id:
type: string
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- dataset_id
- dataset_schema
- url
RegisterBenchmarkRequest:
type: object
properties:
@ -4705,6 +4791,36 @@ components:
- benchmark_id
- dataset_id
- scoring_functions
RegisterDatasetRequest:
type: object
properties:
dataset_id:
type: string
dataset_schema:
type: object
additionalProperties:
$ref: '#/components/schemas/ParamType'
url:
$ref: '#/components/schemas/URL'
provider_dataset_id:
type: string
provider_id:
type: string
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- dataset_id
- dataset_schema
- url
RegisterModelRequest:
type: object
properties:
@ -4827,6 +4943,14 @@ components:
additionalProperties: false
required:
- job_id
RunEvalDeprecatedRequest:
type: object
properties:
task_config:
$ref: '#/components/schemas/BenchmarkConfig'
additionalProperties: false
required:
- task_config
RunShieldRequest:
type: object
properties:
@ -5125,10 +5249,10 @@ tags:
x-displayName: >-
Agents API for creating and interacting with agentic systems.
- name: BatchInference (Coming Soon)
- name: Benchmarks
- name: DatasetIO
- name: Datasets
- name: Eval
- name: Benchmarks
- name: Inference
description: >-
This API provides the raw interface to the underlying models. Two kinds of models
@ -5159,10 +5283,10 @@ x-tagGroups:
tags:
- Agents
- BatchInference (Coming Soon)
- Benchmarks
- DatasetIO
- Datasets
- Eval
- Benchmarks
- Inference
- Inspect
- Models

View file

@ -83,3 +83,28 @@ class Eval(Protocol):
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
@webmethod(route="/eval/tasks/{task_id}/jobs", method="POST")
async def run_eval_DEPRECATED(
self,
task_id: str,
task_config: BenchmarkConfig,
) -> Job: ...
@webmethod(route="/eval/tasks/{task_id}/evaluations", method="POST")
async def evaluate_rows_DEPRECATED(
self,
task_id: str,
input_rows: List[Dict[str, Any]],
scoring_functions: List[str],
task_config: BenchmarkConfig,
) -> EvaluateResponse: ...
@webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="GET")
async def job_status_DEPRECATED(self, task_id: str, job_id: str) -> Optional[JobStatus]: ...
@webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="DELETE")
async def job_cancel_DEPRECATED(self, task_id: str, job_id: str) -> None: ...
@webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}/result", method="GET")
async def job_result_DEPRECATED(self, task_id: str, job_id: str) -> EvaluateResponse: ...

View file

@ -9,7 +9,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
from llama_stack.apis.common.content_types import InterleavedContent, URL
from llama_stack.apis.datasetio import DatasetIO, PaginatedRowsResult
from llama_stack.apis.eval import (
AppBenchmarkConfig,
BenchmarkConfig,
Eval,
EvaluateResponse,
@ -348,7 +347,7 @@ class EvalRouter(Eval):
async def run_eval(
self,
benchmark_id: str,
task_config: AppBenchmarkConfig,
task_config: BenchmarkConfig,
) -> Job:
return await self.routing_table.get_provider_impl(benchmark_id).run_eval(
benchmark_id=benchmark_id,