mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-10 04:08:31 +00:00
update
This commit is contained in:
parent
e07776fff6
commit
ec721b3867
4 changed files with 695 additions and 334 deletions
619
docs/_static/llama-stack-spec.html
vendored
619
docs/_static/llama-stack-spec.html
vendored
|
@ -67,8 +67,8 @@
|
|||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"name": "task_id",
|
||||
"in": "query",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -114,7 +114,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/DeprecatedRegisterBenchmarkRequest"
|
||||
"$ref": "#/components/schemas/DeprecatedRegisterEvalTaskRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -613,7 +613,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{benchmark_id}/evaluations": {
|
||||
"/v1/eval/benchmarks/{benchmark_id}/evaluations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -653,6 +653,47 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{task_id}/evaluations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "task_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateRowsDeprecatedRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -753,6 +794,43 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/datasets/{dataset_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -811,43 +889,6 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{benchmark_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/models/{model_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -1431,7 +1472,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{benchmark_id}/jobs/{job_id}": {
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -1505,7 +1546,83 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{benchmark_id}/jobs/{job_id}/result": {
|
||||
"/v1/eval/tasks/{task_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/JobStatus"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "task_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "task_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -1525,7 +1642,7 @@
|
|||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "job_id",
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
|
@ -1533,7 +1650,7 @@
|
|||
}
|
||||
},
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
|
@ -1543,6 +1660,88 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{task_id}/jobs/{job_id}/result": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "task_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBenchmarksResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": []
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RegisterBenchmarkRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/datasets": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -1586,49 +1785,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBenchmarksResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": []
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RegisterBenchmarkRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/models": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2204,7 +2360,7 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{benchmark_id}/jobs": {
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -2244,6 +2400,47 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/tasks/{task_id}/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "task_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RunEvalDeprecatedRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/safety/run-shield": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -2526,10 +2723,10 @@
|
|||
"data"
|
||||
]
|
||||
},
|
||||
"DeprecatedRegisterBenchmarkRequest": {
|
||||
"DeprecatedRegisterEvalTaskRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"benchmark_id": {
|
||||
"task_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"dataset_id": {
|
||||
|
@ -2575,7 +2772,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"benchmark_id",
|
||||
"task_id",
|
||||
"dataset_id",
|
||||
"scoring_functions"
|
||||
]
|
||||
|
@ -4745,34 +4942,6 @@
|
|||
"accuracy"
|
||||
]
|
||||
},
|
||||
"AppBenchmarkConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "app",
|
||||
"default": "app"
|
||||
},
|
||||
"eval_candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate"
|
||||
},
|
||||
"scoring_params": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
}
|
||||
},
|
||||
"num_examples": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"eval_candidate",
|
||||
"scoring_params"
|
||||
]
|
||||
},
|
||||
"BasicScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -4793,25 +4962,26 @@
|
|||
"type"
|
||||
]
|
||||
},
|
||||
"BenchmarkBenchmarkConfig": {
|
||||
"BenchmarkConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "benchmark",
|
||||
"default": "benchmark"
|
||||
},
|
||||
"eval_candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate"
|
||||
},
|
||||
"scoring_params": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
}
|
||||
},
|
||||
"num_examples": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"eval_candidate"
|
||||
"eval_candidate",
|
||||
"scoring_params"
|
||||
]
|
||||
},
|
||||
"EvalCandidate": {
|
||||
|
@ -4831,23 +5001,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"BenchmarkConfig": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/BenchmarkBenchmarkConfig"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AppBenchmarkConfig"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"benchmark": "#/components/schemas/BenchmarkBenchmarkConfig",
|
||||
"app": "#/components/schemas/AppBenchmarkConfig"
|
||||
}
|
||||
}
|
||||
},
|
||||
"LLMAsJudgeScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -5108,6 +5261,54 @@
|
|||
"aggregated_results"
|
||||
]
|
||||
},
|
||||
"EvaluateRowsDeprecatedRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"task_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_rows",
|
||||
"scoring_functions",
|
||||
"task_config"
|
||||
]
|
||||
},
|
||||
"Session": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7304,60 +7505,6 @@
|
|||
"data"
|
||||
]
|
||||
},
|
||||
"RegisterDatasetRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"dataset_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ParamType"
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"$ref": "#/components/schemas/URL"
|
||||
},
|
||||
"provider_dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"dataset_schema",
|
||||
"url"
|
||||
]
|
||||
},
|
||||
"RegisterBenchmarkRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7412,6 +7559,60 @@
|
|||
"scoring_functions"
|
||||
]
|
||||
},
|
||||
"RegisterDatasetRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"dataset_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ParamType"
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"$ref": "#/components/schemas/URL"
|
||||
},
|
||||
"provider_dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"dataset_schema",
|
||||
"url"
|
||||
]
|
||||
},
|
||||
"RegisterModelRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7623,6 +7824,18 @@
|
|||
"job_id"
|
||||
]
|
||||
},
|
||||
"RunEvalDeprecatedRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"task_config"
|
||||
]
|
||||
},
|
||||
"RunShieldRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -8105,6 +8318,9 @@
|
|||
{
|
||||
"name": "BatchInference (Coming Soon)"
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks"
|
||||
},
|
||||
{
|
||||
"name": "DatasetIO"
|
||||
},
|
||||
|
@ -8114,9 +8330,6 @@
|
|||
{
|
||||
"name": "Eval"
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
|
@ -8168,10 +8381,10 @@
|
|||
"tags": [
|
||||
"Agents",
|
||||
"BatchInference (Coming Soon)",
|
||||
"Benchmarks",
|
||||
"DatasetIO",
|
||||
"Datasets",
|
||||
"Eval",
|
||||
"Benchmarks",
|
||||
"Inference",
|
||||
"Inspect",
|
||||
"Models",
|
||||
|
|
382
docs/_static/llama-stack-spec.yaml
vendored
382
docs/_static/llama-stack-spec.yaml
vendored
|
@ -25,8 +25,8 @@ paths:
|
|||
- Benchmarks
|
||||
description: ''
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
- name: task_id
|
||||
in: query
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -57,7 +57,7 @@ paths:
|
|||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/DeprecatedRegisterBenchmarkRequest'
|
||||
$ref: '#/components/schemas/DeprecatedRegisterEvalTaskRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/datasetio/rows:
|
||||
|
@ -372,7 +372,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/EmbeddingsRequest'
|
||||
required: true
|
||||
/v1/eval/tasks/{benchmark_id}/evaluations:
|
||||
/v1/eval/benchmarks/{benchmark_id}/evaluations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -396,6 +396,31 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/EvaluateRowsRequest'
|
||||
required: true
|
||||
/v1/eval/tasks/{task_id}/evaluations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateResponse'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
parameters:
|
||||
- name: task_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateRowsDeprecatedRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -457,6 +482,26 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks/{benchmark_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Benchmark'
|
||||
- type: 'null'
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/datasets/{dataset_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -490,26 +535,6 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/tasks/{benchmark_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Benchmark'
|
||||
- type: 'null'
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/models/{model_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -852,7 +877,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
/v1/eval/tasks/{benchmark_id}/jobs/{job_id}:
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -895,7 +920,52 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/tasks/{benchmark_id}/jobs/{job_id}/result:
|
||||
/v1/eval/tasks/{task_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/JobStatus'
|
||||
- type: 'null'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
parameters:
|
||||
- name: task_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
parameters:
|
||||
- name: task_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -908,16 +978,67 @@ paths:
|
|||
- Eval
|
||||
description: ''
|
||||
parameters:
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/tasks/{task_id}/jobs/{job_id}/result:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateResponse'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
parameters:
|
||||
- name: task_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/eval/benchmarks:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBenchmarksResponse'
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
parameters: []
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||
required: true
|
||||
/v1/datasets:
|
||||
get:
|
||||
responses:
|
||||
|
@ -945,33 +1066,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RegisterDatasetRequest'
|
||||
required: true
|
||||
/v1/eval/tasks:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBenchmarksResponse'
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
parameters: []
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||
required: true
|
||||
/v1/models:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1328,7 +1422,7 @@ paths:
|
|||
type: array
|
||||
items:
|
||||
type: string
|
||||
/v1/eval/tasks/{benchmark_id}/jobs:
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -1352,6 +1446,31 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RunEvalRequest'
|
||||
required: true
|
||||
/v1/eval/tasks/{task_id}/jobs:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
parameters:
|
||||
- name: task_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RunEvalDeprecatedRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/safety/run-shield:
|
||||
post:
|
||||
responses:
|
||||
|
@ -1527,10 +1646,10 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
DeprecatedRegisterBenchmarkRequest:
|
||||
DeprecatedRegisterEvalTaskRequest:
|
||||
type: object
|
||||
properties:
|
||||
benchmark_id:
|
||||
task_id:
|
||||
type: string
|
||||
dataset_id:
|
||||
type: string
|
||||
|
@ -1554,7 +1673,7 @@ components:
|
|||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- benchmark_id
|
||||
- task_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
AppendRowsRequest:
|
||||
|
@ -3063,26 +3182,6 @@ components:
|
|||
- median
|
||||
- categorical_count
|
||||
- accuracy
|
||||
AppBenchmarkConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: app
|
||||
default: app
|
||||
eval_candidate:
|
||||
$ref: '#/components/schemas/EvalCandidate'
|
||||
scoring_params:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
num_examples:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- eval_candidate
|
||||
- scoring_params
|
||||
BasicScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3097,21 +3196,21 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
BenchmarkBenchmarkConfig:
|
||||
BenchmarkConfig:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: benchmark
|
||||
default: benchmark
|
||||
eval_candidate:
|
||||
$ref: '#/components/schemas/EvalCandidate'
|
||||
scoring_params:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
num_examples:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- eval_candidate
|
||||
- scoring_params
|
||||
EvalCandidate:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelCandidate'
|
||||
|
@ -3121,15 +3220,6 @@ components:
|
|||
mapping:
|
||||
model: '#/components/schemas/ModelCandidate'
|
||||
agent: '#/components/schemas/AgentCandidate'
|
||||
BenchmarkConfig:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/BenchmarkBenchmarkConfig'
|
||||
- $ref: '#/components/schemas/AppBenchmarkConfig'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
benchmark: '#/components/schemas/BenchmarkBenchmarkConfig'
|
||||
app: '#/components/schemas/AppBenchmarkConfig'
|
||||
LLMAsJudgeScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3278,6 +3368,32 @@ components:
|
|||
required:
|
||||
- score_rows
|
||||
- aggregated_results
|
||||
EvaluateRowsDeprecatedRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
scoring_functions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
task_config:
|
||||
$ref: '#/components/schemas/BenchmarkConfig'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_rows
|
||||
- scoring_functions
|
||||
- task_config
|
||||
Session:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4645,36 +4761,6 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
RegisterDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
dataset_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
url:
|
||||
$ref: '#/components/schemas/URL'
|
||||
provider_dataset_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- dataset_schema
|
||||
- url
|
||||
RegisterBenchmarkRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4705,6 +4791,36 @@ components:
|
|||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
RegisterDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
dataset_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
url:
|
||||
$ref: '#/components/schemas/URL'
|
||||
provider_dataset_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- dataset_schema
|
||||
- url
|
||||
RegisterModelRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4827,6 +4943,14 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- job_id
|
||||
RunEvalDeprecatedRequest:
|
||||
type: object
|
||||
properties:
|
||||
task_config:
|
||||
$ref: '#/components/schemas/BenchmarkConfig'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- task_config
|
||||
RunShieldRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5125,10 +5249,10 @@ tags:
|
|||
x-displayName: >-
|
||||
Agents API for creating and interacting with agentic systems.
|
||||
- name: BatchInference (Coming Soon)
|
||||
- name: Benchmarks
|
||||
- name: DatasetIO
|
||||
- name: Datasets
|
||||
- name: Eval
|
||||
- name: Benchmarks
|
||||
- name: Inference
|
||||
description: >-
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
|
@ -5159,10 +5283,10 @@ x-tagGroups:
|
|||
tags:
|
||||
- Agents
|
||||
- BatchInference (Coming Soon)
|
||||
- Benchmarks
|
||||
- DatasetIO
|
||||
- Datasets
|
||||
- Eval
|
||||
- Benchmarks
|
||||
- Inference
|
||||
- Inspect
|
||||
- Models
|
||||
|
|
|
@ -83,3 +83,28 @@ class Eval(Protocol):
|
|||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
|
||||
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
|
||||
|
||||
@webmethod(route="/eval/tasks/{task_id}/jobs", method="POST")
|
||||
async def run_eval_DEPRECATED(
|
||||
self,
|
||||
task_id: str,
|
||||
task_config: BenchmarkConfig,
|
||||
) -> Job: ...
|
||||
|
||||
@webmethod(route="/eval/tasks/{task_id}/evaluations", method="POST")
|
||||
async def evaluate_rows_DEPRECATED(
|
||||
self,
|
||||
task_id: str,
|
||||
input_rows: List[Dict[str, Any]],
|
||||
scoring_functions: List[str],
|
||||
task_config: BenchmarkConfig,
|
||||
) -> EvaluateResponse: ...
|
||||
|
||||
@webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="GET")
|
||||
async def job_status_DEPRECATED(self, task_id: str, job_id: str) -> Optional[JobStatus]: ...
|
||||
|
||||
@webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="DELETE")
|
||||
async def job_cancel_DEPRECATED(self, task_id: str, job_id: str) -> None: ...
|
||||
|
||||
@webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}/result", method="GET")
|
||||
async def job_result_DEPRECATED(self, task_id: str, job_id: str) -> EvaluateResponse: ...
|
||||
|
|
|
@ -9,7 +9,6 @@ from typing import Any, AsyncGenerator, Dict, List, Optional
|
|||
from llama_stack.apis.common.content_types import InterleavedContent, URL
|
||||
from llama_stack.apis.datasetio import DatasetIO, PaginatedRowsResult
|
||||
from llama_stack.apis.eval import (
|
||||
AppBenchmarkConfig,
|
||||
BenchmarkConfig,
|
||||
Eval,
|
||||
EvaluateResponse,
|
||||
|
@ -348,7 +347,7 @@ class EvalRouter(Eval):
|
|||
async def run_eval(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
task_config: AppBenchmarkConfig,
|
||||
task_config: BenchmarkConfig,
|
||||
) -> Job:
|
||||
return await self.routing_table.get_provider_impl(benchmark_id).run_eval(
|
||||
benchmark_id=benchmark_id,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue