forked from phoenix-oss/llama-stack-mirror
docs: api documentation for agents/eval/scoring/datasets (#1400)
# What does this PR do? - add some docs to OpenAPI for agents/eval/scoring/datasetio [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - read [//]: # (## Documentation)
This commit is contained in:
parent
0d18274d34
commit
3d9331840e
6 changed files with 586 additions and 137 deletions
291
docs/_static/llama-stack-spec.html
vendored
291
docs/_static/llama-stack-spec.html
vendored
|
@ -69,11 +69,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"DatasetIO"
|
"DatasetIO"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Get a paginated list of rows from a dataset.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "dataset_id",
|
"name": "dataset_id",
|
||||||
"in": "query",
|
"in": "query",
|
||||||
|
"description": "The ID of the dataset to get the rows from.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -82,6 +83,7 @@
|
||||||
{
|
{
|
||||||
"name": "rows_in_page",
|
"name": "rows_in_page",
|
||||||
"in": "query",
|
"in": "query",
|
||||||
|
"description": "The number of rows to get per page.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
|
@ -90,6 +92,7 @@
|
||||||
{
|
{
|
||||||
"name": "page_token",
|
"name": "page_token",
|
||||||
"in": "query",
|
"in": "query",
|
||||||
|
"description": "The token to get the next page of rows.",
|
||||||
"required": false,
|
"required": false,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -98,6 +101,7 @@
|
||||||
{
|
{
|
||||||
"name": "filter_condition",
|
"name": "filter_condition",
|
||||||
"in": "query",
|
"in": "query",
|
||||||
|
"description": "(Optional) A condition to filter the rows by.",
|
||||||
"required": false,
|
"required": false,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -362,7 +366,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "An AgentCreateResponse with the agent ID.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -387,7 +391,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Create an agent with the given configuration.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -405,7 +409,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "An AgentSessionCreateResponse.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -430,11 +434,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Create a new session for an agent.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to create the session for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -457,7 +462,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
|
"description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -487,11 +492,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Create a new turn for an agent.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to create the turn for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -500,6 +506,7 @@
|
||||||
{
|
{
|
||||||
"name": "session_id",
|
"name": "session_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the session to create the turn for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -623,11 +630,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Delete an agent by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to delete.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -665,11 +673,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Retrieve an agent session by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "session_id",
|
"name": "session_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the session to get.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -678,6 +687,7 @@
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to get the session for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -686,6 +696,7 @@
|
||||||
{
|
{
|
||||||
"name": "turn_ids",
|
"name": "turn_ids",
|
||||||
"in": "query",
|
"in": "query",
|
||||||
|
"description": "(Optional) List of turn IDs to filter the session by.",
|
||||||
"required": false,
|
"required": false,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
|
@ -717,11 +728,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Delete an agent session by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "session_id",
|
"name": "session_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the session to delete.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -730,6 +742,7 @@
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to delete the session for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -887,7 +900,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "EvaluateResponse object containing generations and scores",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -912,11 +925,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Eval"
|
"Eval"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Evaluate a list of rows on a benchmark.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -939,7 +953,7 @@
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "An AgentStepResponse.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -964,11 +978,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Retrieve an agent step by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to get the step for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -977,6 +992,7 @@
|
||||||
{
|
{
|
||||||
"name": "session_id",
|
"name": "session_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the session to get the step for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -985,6 +1001,7 @@
|
||||||
{
|
{
|
||||||
"name": "turn_id",
|
"name": "turn_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the turn to get the step for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -993,6 +1010,7 @@
|
||||||
{
|
{
|
||||||
"name": "step_id",
|
"name": "step_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the step to get.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -1005,7 +1023,7 @@
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "A Turn.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -1030,11 +1048,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents"
|
"Agents"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Retrieve an agent turn by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "agent_id",
|
"name": "agent_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the agent to get the turn for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -1043,6 +1062,7 @@
|
||||||
{
|
{
|
||||||
"name": "session_id",
|
"name": "session_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the session to get the turn for.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -1051,6 +1071,7 @@
|
||||||
{
|
{
|
||||||
"name": "turn_id",
|
"name": "turn_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the turn to get.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2105,7 +2126,7 @@
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "The status of the evaluationjob.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -2137,11 +2158,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Eval"
|
"Eval"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Get the status of a job.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2150,6 +2172,7 @@
|
||||||
{
|
{
|
||||||
"name": "job_id",
|
"name": "job_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the job to get the status of.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2178,11 +2201,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Eval"
|
"Eval"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Cancel a job.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2191,6 +2215,7 @@
|
||||||
{
|
{
|
||||||
"name": "job_id",
|
"name": "job_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the job to cancel.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2203,7 +2228,7 @@
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "The result of the job.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -2228,11 +2253,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Eval"
|
"Eval"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Get the result of a job.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2241,6 +2267,7 @@
|
||||||
{
|
{
|
||||||
"name": "job_id",
|
"name": "job_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the job to get the result of.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -3271,7 +3298,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "The job that was created to run the evaluation.",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -3296,11 +3323,12 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Eval"
|
"Eval"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Run an evaluation on a benchmark.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "benchmark_id",
|
"name": "benchmark_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the benchmark to run the evaluation on.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -3402,7 +3430,7 @@
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"description": "OK",
|
"description": "ScoreResponse object containing rows and aggregated results",
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
|
@ -3427,7 +3455,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Scoring"
|
"Scoring"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Score a list of rows.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -5192,7 +5220,8 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"agent_config": {
|
"agent_config": {
|
||||||
"$ref": "#/components/schemas/AgentConfig"
|
"$ref": "#/components/schemas/AgentConfig",
|
||||||
|
"description": "The configuration for the agent."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5218,7 +5247,8 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"session_name": {
|
"session_name": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The name of the session to create."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5254,10 +5284,12 @@
|
||||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
"$ref": "#/components/schemas/ToolResponseMessage"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"description": "List of messages to start the turn with."
|
||||||
},
|
},
|
||||||
"stream": {
|
"stream": {
|
||||||
"type": "boolean"
|
"type": "boolean",
|
||||||
|
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||||
},
|
},
|
||||||
"documents": {
|
"documents": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
|
@ -5281,10 +5313,12 @@
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/URL"
|
"$ref": "#/components/schemas/URL"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"description": "The content of the document."
|
||||||
},
|
},
|
||||||
"mime_type": {
|
"mime_type": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The MIME type of the document."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5292,17 +5326,21 @@
|
||||||
"content",
|
"content",
|
||||||
"mime_type"
|
"mime_type"
|
||||||
],
|
],
|
||||||
"title": "Document"
|
"title": "Document",
|
||||||
}
|
"description": "A document to be used by an agent."
|
||||||
|
},
|
||||||
|
"description": "(Optional) List of documents to create the turn with."
|
||||||
},
|
},
|
||||||
"toolgroups": {
|
"toolgroups": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AgentTool"
|
"$ref": "#/components/schemas/AgentTool"
|
||||||
}
|
},
|
||||||
|
"description": "(Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request."
|
||||||
},
|
},
|
||||||
"tool_config": {
|
"tool_config": {
|
||||||
"$ref": "#/components/schemas/ToolConfig"
|
"$ref": "#/components/schemas/ToolConfig",
|
||||||
|
"description": "(Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5315,18 +5353,22 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"turn_id": {
|
"turn_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the step."
|
||||||
},
|
},
|
||||||
"started_at": {
|
"started_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step started."
|
||||||
},
|
},
|
||||||
"completed_at": {
|
"completed_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step completed."
|
||||||
},
|
},
|
||||||
"step_type": {
|
"step_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -5334,7 +5376,8 @@
|
||||||
"default": "inference"
|
"default": "inference"
|
||||||
},
|
},
|
||||||
"model_response": {
|
"model_response": {
|
||||||
"$ref": "#/components/schemas/CompletionMessage"
|
"$ref": "#/components/schemas/CompletionMessage",
|
||||||
|
"description": "The response from the LLM."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5344,24 +5387,29 @@
|
||||||
"step_type",
|
"step_type",
|
||||||
"model_response"
|
"model_response"
|
||||||
],
|
],
|
||||||
"title": "InferenceStep"
|
"title": "InferenceStep",
|
||||||
|
"description": "An inference step in an agent turn."
|
||||||
},
|
},
|
||||||
"MemoryRetrievalStep": {
|
"MemoryRetrievalStep": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"turn_id": {
|
"turn_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the step."
|
||||||
},
|
},
|
||||||
"started_at": {
|
"started_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step started."
|
||||||
},
|
},
|
||||||
"completed_at": {
|
"completed_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step completed."
|
||||||
},
|
},
|
||||||
"step_type": {
|
"step_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -5369,10 +5417,12 @@
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_db_ids": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
"inserted_context": {
|
"inserted_context": {
|
||||||
"$ref": "#/components/schemas/InterleavedContent"
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
|
"description": "The context retrieved from the vector databases."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5383,7 +5433,8 @@
|
||||||
"vector_db_ids",
|
"vector_db_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep"
|
"title": "MemoryRetrievalStep",
|
||||||
|
"description": "A memory retrieval step in an agent turn."
|
||||||
},
|
},
|
||||||
"SafetyViolation": {
|
"SafetyViolation": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -5431,18 +5482,22 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"turn_id": {
|
"turn_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the step."
|
||||||
},
|
},
|
||||||
"started_at": {
|
"started_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step started."
|
||||||
},
|
},
|
||||||
"completed_at": {
|
"completed_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step completed."
|
||||||
},
|
},
|
||||||
"step_type": {
|
"step_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -5450,7 +5505,8 @@
|
||||||
"default": "shield_call"
|
"default": "shield_call"
|
||||||
},
|
},
|
||||||
"violation": {
|
"violation": {
|
||||||
"$ref": "#/components/schemas/SafetyViolation"
|
"$ref": "#/components/schemas/SafetyViolation",
|
||||||
|
"description": "The violation from the shield call."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5459,24 +5515,29 @@
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type"
|
"step_type"
|
||||||
],
|
],
|
||||||
"title": "ShieldCallStep"
|
"title": "ShieldCallStep",
|
||||||
|
"description": "A shield call step in an agent turn."
|
||||||
},
|
},
|
||||||
"ToolExecutionStep": {
|
"ToolExecutionStep": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"turn_id": {
|
"turn_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The ID of the step."
|
||||||
},
|
},
|
||||||
"started_at": {
|
"started_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step started."
|
||||||
},
|
},
|
||||||
"completed_at": {
|
"completed_at": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"format": "date-time"
|
"format": "date-time",
|
||||||
|
"description": "The time the step completed."
|
||||||
},
|
},
|
||||||
"step_type": {
|
"step_type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -5487,13 +5548,15 @@
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolCall"
|
"$ref": "#/components/schemas/ToolCall"
|
||||||
}
|
},
|
||||||
|
"description": "The tool calls to execute."
|
||||||
},
|
},
|
||||||
"tool_responses": {
|
"tool_responses": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolResponse"
|
"$ref": "#/components/schemas/ToolResponse"
|
||||||
}
|
},
|
||||||
|
"description": "The tool responses from the tool calls."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5504,7 +5567,8 @@
|
||||||
"tool_calls",
|
"tool_calls",
|
||||||
"tool_responses"
|
"tool_responses"
|
||||||
],
|
],
|
||||||
"title": "ToolExecutionStep"
|
"title": "ToolExecutionStep",
|
||||||
|
"description": "A tool execution step in an agent turn."
|
||||||
},
|
},
|
||||||
"ToolResponse": {
|
"ToolResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -5641,10 +5705,12 @@
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/URL"
|
"$ref": "#/components/schemas/URL"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"description": "The content of the attachment."
|
||||||
},
|
},
|
||||||
"mime_type": {
|
"mime_type": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The MIME type of the attachment."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5652,7 +5718,8 @@
|
||||||
"content",
|
"content",
|
||||||
"mime_type"
|
"mime_type"
|
||||||
],
|
],
|
||||||
"title": "Attachment"
|
"title": "Attachment",
|
||||||
|
"description": "An attachment to an agent turn."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"started_at": {
|
"started_at": {
|
||||||
|
@ -5747,7 +5814,8 @@
|
||||||
"shield_call",
|
"shield_call",
|
||||||
"memory_retrieval"
|
"memory_retrieval"
|
||||||
],
|
],
|
||||||
"title": "StepType"
|
"title": "StepType",
|
||||||
|
"description": "Type of the step in an agent turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -5803,7 +5871,8 @@
|
||||||
"shield_call",
|
"shield_call",
|
||||||
"memory_retrieval"
|
"memory_retrieval"
|
||||||
],
|
],
|
||||||
"title": "StepType"
|
"title": "StepType",
|
||||||
|
"description": "Type of the step in an agent turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -5837,7 +5906,8 @@
|
||||||
"shield_call",
|
"shield_call",
|
||||||
"memory_retrieval"
|
"memory_retrieval"
|
||||||
],
|
],
|
||||||
"title": "StepType"
|
"title": "StepType",
|
||||||
|
"description": "Type of the step in an agent turn."
|
||||||
},
|
},
|
||||||
"step_id": {
|
"step_id": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -6129,7 +6199,8 @@
|
||||||
"default": "agent"
|
"default": "agent"
|
||||||
},
|
},
|
||||||
"config": {
|
"config": {
|
||||||
"$ref": "#/components/schemas/AgentConfig"
|
"$ref": "#/components/schemas/AgentConfig",
|
||||||
|
"description": "The configuration for the agent candidate."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6137,7 +6208,8 @@
|
||||||
"type",
|
"type",
|
||||||
"config"
|
"config"
|
||||||
],
|
],
|
||||||
"title": "AgentCandidate"
|
"title": "AgentCandidate",
|
||||||
|
"description": "An agent candidate for evaluation."
|
||||||
},
|
},
|
||||||
"AggregationFunctionType": {
|
"AggregationFunctionType": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -6174,16 +6246,19 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"eval_candidate": {
|
"eval_candidate": {
|
||||||
"$ref": "#/components/schemas/EvalCandidate"
|
"$ref": "#/components/schemas/EvalCandidate",
|
||||||
|
"description": "The candidate to evaluate."
|
||||||
},
|
},
|
||||||
"scoring_params": {
|
"scoring_params": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"$ref": "#/components/schemas/ScoringFnParams"
|
"$ref": "#/components/schemas/ScoringFnParams"
|
||||||
}
|
},
|
||||||
|
"description": "Map between scoring function id and parameters for each scoring function you want to run"
|
||||||
},
|
},
|
||||||
"num_examples": {
|
"num_examples": {
|
||||||
"type": "integer"
|
"type": "integer",
|
||||||
|
"description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6191,7 +6266,8 @@
|
||||||
"eval_candidate",
|
"eval_candidate",
|
||||||
"scoring_params"
|
"scoring_params"
|
||||||
],
|
],
|
||||||
"title": "BenchmarkConfig"
|
"title": "BenchmarkConfig",
|
||||||
|
"description": "A benchmark configuration for evaluation."
|
||||||
},
|
},
|
||||||
"EvalCandidate": {
|
"EvalCandidate": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
|
@ -6253,13 +6329,16 @@
|
||||||
"default": "model"
|
"default": "model"
|
||||||
},
|
},
|
||||||
"model": {
|
"model": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The model ID to evaluate."
|
||||||
},
|
},
|
||||||
"sampling_params": {
|
"sampling_params": {
|
||||||
"$ref": "#/components/schemas/SamplingParams"
|
"$ref": "#/components/schemas/SamplingParams",
|
||||||
|
"description": "The sampling parameters for the model."
|
||||||
},
|
},
|
||||||
"system_message": {
|
"system_message": {
|
||||||
"$ref": "#/components/schemas/SystemMessage"
|
"$ref": "#/components/schemas/SystemMessage",
|
||||||
|
"description": "(Optional) The system message providing instructions or context to the model."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6268,7 +6347,8 @@
|
||||||
"model",
|
"model",
|
||||||
"sampling_params"
|
"sampling_params"
|
||||||
],
|
],
|
||||||
"title": "ModelCandidate"
|
"title": "ModelCandidate",
|
||||||
|
"description": "A model candidate for evaluation."
|
||||||
},
|
},
|
||||||
"RegexParserScoringFnParams": {
|
"RegexParserScoringFnParams": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6347,16 +6427,19 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "The rows to evaluate."
|
||||||
},
|
},
|
||||||
"scoring_functions": {
|
"scoring_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The scoring functions to use for the evaluation."
|
||||||
},
|
},
|
||||||
"benchmark_config": {
|
"benchmark_config": {
|
||||||
"$ref": "#/components/schemas/BenchmarkConfig"
|
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||||
|
"description": "The configuration for the benchmark."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6396,13 +6479,15 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "The generations from the evaluation."
|
||||||
},
|
},
|
||||||
"scores": {
|
"scores": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"$ref": "#/components/schemas/ScoringResult"
|
"$ref": "#/components/schemas/ScoringResult"
|
||||||
}
|
},
|
||||||
|
"description": "The scores from the evaluation."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6410,7 +6495,8 @@
|
||||||
"generations",
|
"generations",
|
||||||
"scores"
|
"scores"
|
||||||
],
|
],
|
||||||
"title": "EvaluateResponse"
|
"title": "EvaluateResponse",
|
||||||
|
"description": "The response from an evaluation."
|
||||||
},
|
},
|
||||||
"ScoringResult": {
|
"ScoringResult": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6441,7 +6527,8 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "The scoring result for each row. Each row is a map of column name to value."
|
||||||
},
|
},
|
||||||
"aggregated_results": {
|
"aggregated_results": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6466,7 +6553,8 @@
|
||||||
"type": "object"
|
"type": "object"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"description": "Map of metric name to aggregated value"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6474,7 +6562,8 @@
|
||||||
"score_rows",
|
"score_rows",
|
||||||
"aggregated_results"
|
"aggregated_results"
|
||||||
],
|
],
|
||||||
"title": "ScoringResult"
|
"title": "ScoringResult",
|
||||||
|
"description": "A scoring result for a single row."
|
||||||
},
|
},
|
||||||
"Session": {
|
"Session": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6963,13 +7052,16 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "The rows in the current page."
|
||||||
},
|
},
|
||||||
"total_count": {
|
"total_count": {
|
||||||
"type": "integer"
|
"type": "integer",
|
||||||
|
"description": "The total number of rows in the dataset."
|
||||||
},
|
},
|
||||||
"next_page_token": {
|
"next_page_token": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The token to get the next page of rows."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6977,7 +7069,8 @@
|
||||||
"rows",
|
"rows",
|
||||||
"total_count"
|
"total_count"
|
||||||
],
|
],
|
||||||
"title": "PaginatedRowsResult"
|
"title": "PaginatedRowsResult",
|
||||||
|
"description": "A paginated list of rows from a dataset."
|
||||||
},
|
},
|
||||||
"ScoringFn": {
|
"ScoringFn": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9249,7 +9342,8 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"benchmark_config": {
|
"benchmark_config": {
|
||||||
"$ref": "#/components/schemas/BenchmarkConfig"
|
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||||
|
"description": "The configuration for the benchmark."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9386,7 +9480,8 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "The rows to score."
|
||||||
},
|
},
|
||||||
"scoring_functions": {
|
"scoring_functions": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9399,7 +9494,8 @@
|
||||||
"type": "null"
|
"type": "null"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"description": "The scoring functions to use for the scoring."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9416,14 +9512,16 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"$ref": "#/components/schemas/ScoringResult"
|
"$ref": "#/components/schemas/ScoringResult"
|
||||||
}
|
},
|
||||||
|
"description": "A map of scoring function name to ScoringResult."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"results"
|
"results"
|
||||||
],
|
],
|
||||||
"title": "ScoreResponse"
|
"title": "ScoreResponse",
|
||||||
|
"description": "The response from scoring."
|
||||||
},
|
},
|
||||||
"ScoreBatchRequest": {
|
"ScoreBatchRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9838,7 +9936,8 @@
|
||||||
"name": "Datasets"
|
"name": "Datasets"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval"
|
"name": "Eval",
|
||||||
|
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files (Coming Soon)"
|
"name": "Files (Coming Soon)"
|
||||||
|
|
195
docs/_static/llama-stack-spec.yaml
vendored
195
docs/_static/llama-stack-spec.yaml
vendored
|
@ -31,25 +31,32 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- DatasetIO
|
- DatasetIO
|
||||||
description: ''
|
description: >-
|
||||||
|
Get a paginated list of rows from a dataset.
|
||||||
parameters:
|
parameters:
|
||||||
- name: dataset_id
|
- name: dataset_id
|
||||||
in: query
|
in: query
|
||||||
|
description: >-
|
||||||
|
The ID of the dataset to get the rows from.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: rows_in_page
|
- name: rows_in_page
|
||||||
in: query
|
in: query
|
||||||
|
description: The number of rows to get per page.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: integer
|
type: integer
|
||||||
- name: page_token
|
- name: page_token
|
||||||
in: query
|
in: query
|
||||||
|
description: The token to get the next page of rows.
|
||||||
required: false
|
required: false
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: filter_condition
|
- name: filter_condition
|
||||||
in: query
|
in: query
|
||||||
|
description: >-
|
||||||
|
(Optional) A condition to filter the rows by.
|
||||||
required: false
|
required: false
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -234,7 +241,8 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: >-
|
||||||
|
An AgentCreateResponse with the agent ID.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -251,7 +259,8 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: >-
|
||||||
|
Create an agent with the given configuration.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -263,7 +272,7 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: An AgentSessionCreateResponse.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -280,10 +289,12 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Create a new session for an agent.
|
||||||
parameters:
|
parameters:
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the agent to create the session for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -298,8 +309,8 @@ paths:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: >-
|
description: >-
|
||||||
A single turn in an interaction with an Agentic System. **OR** streamed
|
If stream=False, returns a Turn object. If stream=True, returns an SSE
|
||||||
agent turn completion response.
|
event stream of AgentTurnResponseStreamChunk
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -319,15 +330,19 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Create a new turn for an agent.
|
||||||
parameters:
|
parameters:
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the agent to create the turn for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: session_id
|
- name: session_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the session to create the turn for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -411,10 +426,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Delete an agent by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the agent to delete.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -439,20 +455,25 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Retrieve an agent session by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: session_id
|
- name: session_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the session to get.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the agent to get the session for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: turn_ids
|
- name: turn_ids
|
||||||
in: query
|
in: query
|
||||||
|
description: >-
|
||||||
|
(Optional) List of turn IDs to filter the session by.
|
||||||
required: false
|
required: false
|
||||||
schema:
|
schema:
|
||||||
type: array
|
type: array
|
||||||
|
@ -474,15 +495,18 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Delete an agent session by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: session_id
|
- name: session_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the session to delete.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the agent to delete the session for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -596,7 +620,8 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: >-
|
||||||
|
EvaluateResponse object containing generations and scores
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -613,10 +638,12 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Eval
|
- Eval
|
||||||
description: ''
|
description: Evaluate a list of rows on a benchmark.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
- name: benchmark_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -630,7 +657,7 @@ paths:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: An AgentStepResponse.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -647,25 +674,30 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Retrieve an agent step by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the agent to get the step for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: session_id
|
- name: session_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the session to get the step for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: turn_id
|
- name: turn_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the turn to get the step for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: step_id
|
- name: step_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the step to get.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -673,7 +705,7 @@ paths:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: A Turn.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -690,20 +722,24 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
description: ''
|
description: Retrieve an agent turn by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: agent_id
|
- name: agent_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the agent to get the turn for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: session_id
|
- name: session_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the session to get the turn for.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: turn_id
|
- name: turn_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the turn to get.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -1391,7 +1427,7 @@ paths:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: The status of the evaluationjob.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -1410,15 +1446,18 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Eval
|
- Eval
|
||||||
description: ''
|
description: Get the status of a job.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
- name: benchmark_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: job_id
|
- name: job_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the job to get the status of.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -1438,15 +1477,18 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Eval
|
- Eval
|
||||||
description: ''
|
description: Cancel a job.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
- name: benchmark_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: job_id
|
- name: job_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the job to cancel.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -1454,7 +1496,7 @@ paths:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: The result of the job.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -1471,15 +1513,18 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Eval
|
- Eval
|
||||||
description: ''
|
description: Get the result of a job.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
- name: benchmark_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
- name: job_id
|
- name: job_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the job to get the result of.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -2192,7 +2237,8 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: >-
|
||||||
|
The job that was created to run the evaluation.
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -2209,10 +2255,12 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Eval
|
- Eval
|
||||||
description: ''
|
description: Run an evaluation on a benchmark.
|
||||||
parameters:
|
parameters:
|
||||||
- name: benchmark_id
|
- name: benchmark_id
|
||||||
in: path
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the benchmark to run the evaluation on.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -2280,7 +2328,8 @@ paths:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: OK
|
description: >-
|
||||||
|
ScoreResponse object containing rows and aggregated results
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
|
@ -2297,7 +2346,7 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Scoring
|
- Scoring
|
||||||
description: ''
|
description: Score a list of rows.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
@ -3567,6 +3616,7 @@ components:
|
||||||
properties:
|
properties:
|
||||||
agent_config:
|
agent_config:
|
||||||
$ref: '#/components/schemas/AgentConfig'
|
$ref: '#/components/schemas/AgentConfig'
|
||||||
|
description: The configuration for the agent.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- agent_config
|
- agent_config
|
||||||
|
@ -3585,6 +3635,7 @@ components:
|
||||||
properties:
|
properties:
|
||||||
session_name:
|
session_name:
|
||||||
type: string
|
type: string
|
||||||
|
description: The name of the session to create.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- session_name
|
- session_name
|
||||||
|
@ -3607,8 +3658,12 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/UserMessage'
|
- $ref: '#/components/schemas/UserMessage'
|
||||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||||
|
description: List of messages to start the turn with.
|
||||||
stream:
|
stream:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
description: >-
|
||||||
|
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||||
|
to False.
|
||||||
documents:
|
documents:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
@ -3622,19 +3677,30 @@ components:
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/InterleavedContentItem'
|
$ref: '#/components/schemas/InterleavedContentItem'
|
||||||
- $ref: '#/components/schemas/URL'
|
- $ref: '#/components/schemas/URL'
|
||||||
|
description: The content of the document.
|
||||||
mime_type:
|
mime_type:
|
||||||
type: string
|
type: string
|
||||||
|
description: The MIME type of the document.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- mime_type
|
- mime_type
|
||||||
title: Document
|
title: Document
|
||||||
|
description: A document to be used by an agent.
|
||||||
|
description: >-
|
||||||
|
(Optional) List of documents to create the turn with.
|
||||||
toolgroups:
|
toolgroups:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AgentTool'
|
$ref: '#/components/schemas/AgentTool'
|
||||||
|
description: >-
|
||||||
|
(Optional) List of toolgroups to create the turn with, will be used in
|
||||||
|
addition to the agent's config toolgroups for the request.
|
||||||
tool_config:
|
tool_config:
|
||||||
$ref: '#/components/schemas/ToolConfig'
|
$ref: '#/components/schemas/ToolConfig'
|
||||||
|
description: >-
|
||||||
|
(Optional) The tool configuration to create the turn with, will be used
|
||||||
|
to override the agent's tool_config.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- messages
|
- messages
|
||||||
|
@ -3644,20 +3710,25 @@ components:
|
||||||
properties:
|
properties:
|
||||||
turn_id:
|
turn_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the step.
|
||||||
started_at:
|
started_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step started.
|
||||||
completed_at:
|
completed_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step completed.
|
||||||
step_type:
|
step_type:
|
||||||
type: string
|
type: string
|
||||||
const: inference
|
const: inference
|
||||||
default: inference
|
default: inference
|
||||||
model_response:
|
model_response:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/CompletionMessage'
|
||||||
|
description: The response from the LLM.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- turn_id
|
- turn_id
|
||||||
|
@ -3665,27 +3736,36 @@ components:
|
||||||
- step_type
|
- step_type
|
||||||
- model_response
|
- model_response
|
||||||
title: InferenceStep
|
title: InferenceStep
|
||||||
|
description: An inference step in an agent turn.
|
||||||
MemoryRetrievalStep:
|
MemoryRetrievalStep:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
turn_id:
|
turn_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the step.
|
||||||
started_at:
|
started_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step started.
|
||||||
completed_at:
|
completed_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step completed.
|
||||||
step_type:
|
step_type:
|
||||||
type: string
|
type: string
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_db_ids:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
The IDs of the vector databases to retrieve context from.
|
||||||
inserted_context:
|
inserted_context:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
|
description: >-
|
||||||
|
The context retrieved from the vector databases.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- turn_id
|
- turn_id
|
||||||
|
@ -3694,6 +3774,8 @@ components:
|
||||||
- vector_db_ids
|
- vector_db_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
|
description: >-
|
||||||
|
A memory retrieval step in an agent turn.
|
||||||
SafetyViolation:
|
SafetyViolation:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -3721,39 +3803,49 @@ components:
|
||||||
properties:
|
properties:
|
||||||
turn_id:
|
turn_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the step.
|
||||||
started_at:
|
started_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step started.
|
||||||
completed_at:
|
completed_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step completed.
|
||||||
step_type:
|
step_type:
|
||||||
type: string
|
type: string
|
||||||
const: shield_call
|
const: shield_call
|
||||||
default: shield_call
|
default: shield_call
|
||||||
violation:
|
violation:
|
||||||
$ref: '#/components/schemas/SafetyViolation'
|
$ref: '#/components/schemas/SafetyViolation'
|
||||||
|
description: The violation from the shield call.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
title: ShieldCallStep
|
title: ShieldCallStep
|
||||||
|
description: A shield call step in an agent turn.
|
||||||
ToolExecutionStep:
|
ToolExecutionStep:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
turn_id:
|
turn_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ID of the step.
|
||||||
started_at:
|
started_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step started.
|
||||||
completed_at:
|
completed_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
description: The time the step completed.
|
||||||
step_type:
|
step_type:
|
||||||
type: string
|
type: string
|
||||||
const: tool_execution
|
const: tool_execution
|
||||||
|
@ -3762,10 +3854,12 @@ components:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolCall'
|
$ref: '#/components/schemas/ToolCall'
|
||||||
|
description: The tool calls to execute.
|
||||||
tool_responses:
|
tool_responses:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolResponse'
|
$ref: '#/components/schemas/ToolResponse'
|
||||||
|
description: The tool responses from the tool calls.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- turn_id
|
- turn_id
|
||||||
|
@ -3774,6 +3868,7 @@ components:
|
||||||
- tool_calls
|
- tool_calls
|
||||||
- tool_responses
|
- tool_responses
|
||||||
title: ToolExecutionStep
|
title: ToolExecutionStep
|
||||||
|
description: A tool execution step in an agent turn.
|
||||||
ToolResponse:
|
ToolResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -3850,13 +3945,16 @@ components:
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/InterleavedContentItem'
|
$ref: '#/components/schemas/InterleavedContentItem'
|
||||||
- $ref: '#/components/schemas/URL'
|
- $ref: '#/components/schemas/URL'
|
||||||
|
description: The content of the attachment.
|
||||||
mime_type:
|
mime_type:
|
||||||
type: string
|
type: string
|
||||||
|
description: The MIME type of the attachment.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- mime_type
|
- mime_type
|
||||||
title: Attachment
|
title: Attachment
|
||||||
|
description: An attachment to an agent turn.
|
||||||
started_at:
|
started_at:
|
||||||
type: string
|
type: string
|
||||||
format: date-time
|
format: date-time
|
||||||
|
@ -3922,6 +4020,7 @@ components:
|
||||||
- shield_call
|
- shield_call
|
||||||
- memory_retrieval
|
- memory_retrieval
|
||||||
title: StepType
|
title: StepType
|
||||||
|
description: Type of the step in an agent turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
step_details:
|
step_details:
|
||||||
|
@ -3959,6 +4058,7 @@ components:
|
||||||
- shield_call
|
- shield_call
|
||||||
- memory_retrieval
|
- memory_retrieval
|
||||||
title: StepType
|
title: StepType
|
||||||
|
description: Type of the step in an agent turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
delta:
|
delta:
|
||||||
|
@ -3985,6 +4085,7 @@ components:
|
||||||
- shield_call
|
- shield_call
|
||||||
- memory_retrieval
|
- memory_retrieval
|
||||||
title: StepType
|
title: StepType
|
||||||
|
description: Type of the step in an agent turn.
|
||||||
step_id:
|
step_id:
|
||||||
type: string
|
type: string
|
||||||
metadata:
|
metadata:
|
||||||
|
@ -4212,11 +4313,14 @@ components:
|
||||||
default: agent
|
default: agent
|
||||||
config:
|
config:
|
||||||
$ref: '#/components/schemas/AgentConfig'
|
$ref: '#/components/schemas/AgentConfig'
|
||||||
|
description: >-
|
||||||
|
The configuration for the agent candidate.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- config
|
- config
|
||||||
title: AgentCandidate
|
title: AgentCandidate
|
||||||
|
description: An agent candidate for evaluation.
|
||||||
AggregationFunctionType:
|
AggregationFunctionType:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
@ -4245,17 +4349,26 @@ components:
|
||||||
properties:
|
properties:
|
||||||
eval_candidate:
|
eval_candidate:
|
||||||
$ref: '#/components/schemas/EvalCandidate'
|
$ref: '#/components/schemas/EvalCandidate'
|
||||||
|
description: The candidate to evaluate.
|
||||||
scoring_params:
|
scoring_params:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
$ref: '#/components/schemas/ScoringFnParams'
|
||||||
|
description: >-
|
||||||
|
Map between scoring function id and parameters for each scoring function
|
||||||
|
you want to run
|
||||||
num_examples:
|
num_examples:
|
||||||
type: integer
|
type: integer
|
||||||
|
description: >-
|
||||||
|
(Optional) The number of examples to evaluate. If not provided, all examples
|
||||||
|
in the dataset will be evaluated
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- eval_candidate
|
- eval_candidate
|
||||||
- scoring_params
|
- scoring_params
|
||||||
title: BenchmarkConfig
|
title: BenchmarkConfig
|
||||||
|
description: >-
|
||||||
|
A benchmark configuration for evaluation.
|
||||||
EvalCandidate:
|
EvalCandidate:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/ModelCandidate'
|
- $ref: '#/components/schemas/ModelCandidate'
|
||||||
|
@ -4298,16 +4411,22 @@ components:
|
||||||
default: model
|
default: model
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
|
description: The model ID to evaluate.
|
||||||
sampling_params:
|
sampling_params:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
$ref: '#/components/schemas/SamplingParams'
|
||||||
|
description: The sampling parameters for the model.
|
||||||
system_message:
|
system_message:
|
||||||
$ref: '#/components/schemas/SystemMessage'
|
$ref: '#/components/schemas/SystemMessage'
|
||||||
|
description: >-
|
||||||
|
(Optional) The system message providing instructions or context to the
|
||||||
|
model.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- model
|
- model
|
||||||
- sampling_params
|
- sampling_params
|
||||||
title: ModelCandidate
|
title: ModelCandidate
|
||||||
|
description: A model candidate for evaluation.
|
||||||
RegexParserScoringFnParams:
|
RegexParserScoringFnParams:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4353,12 +4472,16 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: The rows to evaluate.
|
||||||
scoring_functions:
|
scoring_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
The scoring functions to use for the evaluation.
|
||||||
benchmark_config:
|
benchmark_config:
|
||||||
$ref: '#/components/schemas/BenchmarkConfig'
|
$ref: '#/components/schemas/BenchmarkConfig'
|
||||||
|
description: The configuration for the benchmark.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- input_rows
|
- input_rows
|
||||||
|
@ -4380,15 +4503,18 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: The generations from the evaluation.
|
||||||
scores:
|
scores:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
$ref: '#/components/schemas/ScoringResult'
|
$ref: '#/components/schemas/ScoringResult'
|
||||||
|
description: The scores from the evaluation.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- generations
|
- generations
|
||||||
- scores
|
- scores
|
||||||
title: EvaluateResponse
|
title: EvaluateResponse
|
||||||
|
description: The response from an evaluation.
|
||||||
ScoringResult:
|
ScoringResult:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4404,6 +4530,8 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: >-
|
||||||
|
The scoring result for each row. Each row is a map of column name to value.
|
||||||
aggregated_results:
|
aggregated_results:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -4414,11 +4542,13 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: Map of metric name to aggregated value
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- score_rows
|
- score_rows
|
||||||
- aggregated_results
|
- aggregated_results
|
||||||
title: ScoringResult
|
title: ScoringResult
|
||||||
|
description: A scoring result for a single row.
|
||||||
Session:
|
Session:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4731,15 +4861,19 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: The rows in the current page.
|
||||||
total_count:
|
total_count:
|
||||||
type: integer
|
type: integer
|
||||||
|
description: The total number of rows in the dataset.
|
||||||
next_page_token:
|
next_page_token:
|
||||||
type: string
|
type: string
|
||||||
|
description: The token to get the next page of rows.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- rows
|
- rows
|
||||||
- total_count
|
- total_count
|
||||||
title: PaginatedRowsResult
|
title: PaginatedRowsResult
|
||||||
|
description: A paginated list of rows from a dataset.
|
||||||
ScoringFn:
|
ScoringFn:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6170,6 +6304,7 @@ components:
|
||||||
properties:
|
properties:
|
||||||
benchmark_config:
|
benchmark_config:
|
||||||
$ref: '#/components/schemas/BenchmarkConfig'
|
$ref: '#/components/schemas/BenchmarkConfig'
|
||||||
|
description: The configuration for the benchmark.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- benchmark_config
|
- benchmark_config
|
||||||
|
@ -6251,12 +6386,15 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: The rows to score.
|
||||||
scoring_functions:
|
scoring_functions:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/ScoringFnParams'
|
- $ref: '#/components/schemas/ScoringFnParams'
|
||||||
- type: 'null'
|
- type: 'null'
|
||||||
|
description: >-
|
||||||
|
The scoring functions to use for the scoring.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- input_rows
|
- input_rows
|
||||||
|
@ -6269,10 +6407,13 @@ components:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
$ref: '#/components/schemas/ScoringResult'
|
$ref: '#/components/schemas/ScoringResult'
|
||||||
|
description: >-
|
||||||
|
A map of scoring function name to ScoringResult.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- results
|
- results
|
||||||
title: ScoreResponse
|
title: ScoreResponse
|
||||||
|
description: The response from scoring.
|
||||||
ScoreBatchRequest:
|
ScoreBatchRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6543,6 +6684,8 @@ tags:
|
||||||
- name: DatasetIO
|
- name: DatasetIO
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
- name: Eval
|
- name: Eval
|
||||||
|
x-displayName: >-
|
||||||
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
- name: Files (Coming Soon)
|
- name: Files (Coming Soon)
|
||||||
- name: Inference
|
- name: Inference
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
@ -41,16 +41,36 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
||||||
|
|
||||||
|
|
||||||
class Attachment(BaseModel):
|
class Attachment(BaseModel):
|
||||||
|
"""An attachment to an agent turn.
|
||||||
|
|
||||||
|
:param content: The content of the attachment.
|
||||||
|
:param mime_type: The MIME type of the attachment.
|
||||||
|
"""
|
||||||
|
|
||||||
content: InterleavedContent | URL
|
content: InterleavedContent | URL
|
||||||
mime_type: str
|
mime_type: str
|
||||||
|
|
||||||
|
|
||||||
class Document(BaseModel):
|
class Document(BaseModel):
|
||||||
|
"""A document to be used by an agent.
|
||||||
|
|
||||||
|
:param content: The content of the document.
|
||||||
|
:param mime_type: The MIME type of the document.
|
||||||
|
"""
|
||||||
|
|
||||||
content: InterleavedContent | URL
|
content: InterleavedContent | URL
|
||||||
mime_type: str
|
mime_type: str
|
||||||
|
|
||||||
|
|
||||||
class StepCommon(BaseModel):
|
class StepCommon(BaseModel):
|
||||||
|
"""A common step in an agent turn.
|
||||||
|
|
||||||
|
:param turn_id: The ID of the turn.
|
||||||
|
:param step_id: The ID of the step.
|
||||||
|
:param started_at: The time the step started.
|
||||||
|
:param completed_at: The time the step completed.
|
||||||
|
"""
|
||||||
|
|
||||||
turn_id: str
|
turn_id: str
|
||||||
step_id: str
|
step_id: str
|
||||||
started_at: Optional[datetime] = None
|
started_at: Optional[datetime] = None
|
||||||
|
@ -58,6 +78,14 @@ class StepCommon(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class StepType(Enum):
|
class StepType(Enum):
|
||||||
|
"""Type of the step in an agent turn.
|
||||||
|
|
||||||
|
:cvar inference: The step is an inference step that calls an LLM.
|
||||||
|
:cvar tool_execution: The step is a tool execution step that executes a tool call.
|
||||||
|
:cvar shield_call: The step is a shield call step that checks for safety violations.
|
||||||
|
:cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
|
||||||
|
"""
|
||||||
|
|
||||||
inference = "inference"
|
inference = "inference"
|
||||||
tool_execution = "tool_execution"
|
tool_execution = "tool_execution"
|
||||||
shield_call = "shield_call"
|
shield_call = "shield_call"
|
||||||
|
@ -66,6 +94,11 @@ class StepType(Enum):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class InferenceStep(StepCommon):
|
class InferenceStep(StepCommon):
|
||||||
|
"""An inference step in an agent turn.
|
||||||
|
|
||||||
|
:param model_response: The response from the LLM.
|
||||||
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
|
||||||
step_type: Literal[StepType.inference.value] = StepType.inference.value
|
step_type: Literal[StepType.inference.value] = StepType.inference.value
|
||||||
|
@ -74,6 +107,12 @@ class InferenceStep(StepCommon):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ToolExecutionStep(StepCommon):
|
class ToolExecutionStep(StepCommon):
|
||||||
|
"""A tool execution step in an agent turn.
|
||||||
|
|
||||||
|
:param tool_calls: The tool calls to execute.
|
||||||
|
:param tool_responses: The tool responses from the tool calls.
|
||||||
|
"""
|
||||||
|
|
||||||
step_type: Literal[StepType.tool_execution.value] = StepType.tool_execution.value
|
step_type: Literal[StepType.tool_execution.value] = StepType.tool_execution.value
|
||||||
tool_calls: List[ToolCall]
|
tool_calls: List[ToolCall]
|
||||||
tool_responses: List[ToolResponse]
|
tool_responses: List[ToolResponse]
|
||||||
|
@ -81,13 +120,25 @@ class ToolExecutionStep(StepCommon):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ShieldCallStep(StepCommon):
|
class ShieldCallStep(StepCommon):
|
||||||
|
"""A shield call step in an agent turn.
|
||||||
|
|
||||||
|
:param violation: The violation from the shield call.
|
||||||
|
"""
|
||||||
|
|
||||||
step_type: Literal[StepType.shield_call.value] = StepType.shield_call.value
|
step_type: Literal[StepType.shield_call.value] = StepType.shield_call.value
|
||||||
violation: Optional[SafetyViolation]
|
violation: Optional[SafetyViolation]
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class MemoryRetrievalStep(StepCommon):
|
class MemoryRetrievalStep(StepCommon):
|
||||||
|
"""A memory retrieval step in an agent turn.
|
||||||
|
|
||||||
|
:param vector_db_ids: The IDs of the vector databases to retrieve context from.
|
||||||
|
:param inserted_context: The context retrieved from the vector databases.
|
||||||
|
"""
|
||||||
|
|
||||||
step_type: Literal[StepType.memory_retrieval.value] = StepType.memory_retrieval.value
|
step_type: Literal[StepType.memory_retrieval.value] = StepType.memory_retrieval.value
|
||||||
|
# TODO: should this be List[str]?
|
||||||
vector_db_ids: str
|
vector_db_ids: str
|
||||||
inserted_context: InterleavedContent
|
inserted_context: InterleavedContent
|
||||||
|
|
||||||
|
@ -335,7 +386,13 @@ class Agents(Protocol):
|
||||||
async def create_agent(
|
async def create_agent(
|
||||||
self,
|
self,
|
||||||
agent_config: AgentConfig,
|
agent_config: AgentConfig,
|
||||||
) -> AgentCreateResponse: ...
|
) -> AgentCreateResponse:
|
||||||
|
"""Create an agent with the given configuration.
|
||||||
|
|
||||||
|
:param agent_config: The configuration for the agent.
|
||||||
|
:returns: An AgentCreateResponse with the agent ID.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}/session/{session_id}/turn", method="POST")
|
@webmethod(route="/agents/{agent_id}/session/{session_id}/turn", method="POST")
|
||||||
async def create_agent_turn(
|
async def create_agent_turn(
|
||||||
|
@ -352,7 +409,19 @@ class Agents(Protocol):
|
||||||
documents: Optional[List[Document]] = None,
|
documents: Optional[List[Document]] = None,
|
||||||
toolgroups: Optional[List[AgentToolGroup]] = None,
|
toolgroups: Optional[List[AgentToolGroup]] = None,
|
||||||
tool_config: Optional[ToolConfig] = None,
|
tool_config: Optional[ToolConfig] = None,
|
||||||
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
|
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
|
||||||
|
"""Create a new turn for an agent.
|
||||||
|
|
||||||
|
:param agent_id: The ID of the agent to create the turn for.
|
||||||
|
:param session_id: The ID of the session to create the turn for.
|
||||||
|
:param messages: List of messages to start the turn with.
|
||||||
|
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||||
|
:param documents: (Optional) List of documents to create the turn with.
|
||||||
|
:param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
|
||||||
|
:param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
|
||||||
|
:returns: If stream=False, returns a Turn object.
|
||||||
|
If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk
|
||||||
|
"""
|
||||||
|
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
||||||
|
@ -388,7 +457,15 @@ class Agents(Protocol):
|
||||||
agent_id: str,
|
agent_id: str,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
turn_id: str,
|
turn_id: str,
|
||||||
) -> Turn: ...
|
) -> Turn:
|
||||||
|
"""Retrieve an agent turn by its ID.
|
||||||
|
|
||||||
|
:param agent_id: The ID of the agent to get the turn for.
|
||||||
|
:param session_id: The ID of the session to get the turn for.
|
||||||
|
:param turn_id: The ID of the turn to get.
|
||||||
|
:returns: A Turn.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(
|
@webmethod(
|
||||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
||||||
|
@ -400,14 +477,30 @@ class Agents(Protocol):
|
||||||
session_id: str,
|
session_id: str,
|
||||||
turn_id: str,
|
turn_id: str,
|
||||||
step_id: str,
|
step_id: str,
|
||||||
) -> AgentStepResponse: ...
|
) -> AgentStepResponse:
|
||||||
|
"""Retrieve an agent step by its ID.
|
||||||
|
|
||||||
|
:param agent_id: The ID of the agent to get the step for.
|
||||||
|
:param session_id: The ID of the session to get the step for.
|
||||||
|
:param turn_id: The ID of the turn to get the step for.
|
||||||
|
:param step_id: The ID of the step to get.
|
||||||
|
:returns: An AgentStepResponse.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}/session", method="POST")
|
@webmethod(route="/agents/{agent_id}/session", method="POST")
|
||||||
async def create_agent_session(
|
async def create_agent_session(
|
||||||
self,
|
self,
|
||||||
agent_id: str,
|
agent_id: str,
|
||||||
session_name: str,
|
session_name: str,
|
||||||
) -> AgentSessionCreateResponse: ...
|
) -> AgentSessionCreateResponse:
|
||||||
|
"""Create a new session for an agent.
|
||||||
|
|
||||||
|
:param agent_id: The ID of the agent to create the session for.
|
||||||
|
:param session_name: The name of the session to create.
|
||||||
|
:returns: An AgentSessionCreateResponse.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
|
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
|
||||||
async def get_agents_session(
|
async def get_agents_session(
|
||||||
|
@ -415,17 +508,35 @@ class Agents(Protocol):
|
||||||
session_id: str,
|
session_id: str,
|
||||||
agent_id: str,
|
agent_id: str,
|
||||||
turn_ids: Optional[List[str]] = None,
|
turn_ids: Optional[List[str]] = None,
|
||||||
) -> Session: ...
|
) -> Session:
|
||||||
|
"""Retrieve an agent session by its ID.
|
||||||
|
|
||||||
|
:param session_id: The ID of the session to get.
|
||||||
|
:param agent_id: The ID of the agent to get the session for.
|
||||||
|
:param turn_ids: (Optional) List of turn IDs to filter the session by.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
|
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
|
||||||
async def delete_agents_session(
|
async def delete_agents_session(
|
||||||
self,
|
self,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
agent_id: str,
|
agent_id: str,
|
||||||
) -> None: ...
|
) -> None:
|
||||||
|
"""Delete an agent session by its ID.
|
||||||
|
|
||||||
|
:param session_id: The ID of the session to delete.
|
||||||
|
:param agent_id: The ID of the agent to delete the session for.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/agents/{agent_id}", method="DELETE")
|
@webmethod(route="/agents/{agent_id}", method="DELETE")
|
||||||
async def delete_agent(
|
async def delete_agent(
|
||||||
self,
|
self,
|
||||||
agent_id: str,
|
agent_id: str,
|
||||||
) -> None: ...
|
) -> None:
|
||||||
|
"""Delete an agent by its ID.
|
||||||
|
|
||||||
|
:param agent_id: The ID of the agent to delete.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
|
@ -14,6 +14,14 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class PaginatedRowsResult(BaseModel):
|
class PaginatedRowsResult(BaseModel):
|
||||||
|
"""
|
||||||
|
A paginated list of rows from a dataset.
|
||||||
|
|
||||||
|
:param rows: The rows in the current page.
|
||||||
|
:param total_count: The total number of rows in the dataset.
|
||||||
|
:param next_page_token: The token to get the next page of rows.
|
||||||
|
"""
|
||||||
|
|
||||||
# the rows obey the DatasetSchema for the given dataset
|
# the rows obey the DatasetSchema for the given dataset
|
||||||
rows: List[Dict[str, Any]]
|
rows: List[Dict[str, Any]]
|
||||||
total_count: int
|
total_count: int
|
||||||
|
@ -36,7 +44,15 @@ class DatasetIO(Protocol):
|
||||||
rows_in_page: int,
|
rows_in_page: int,
|
||||||
page_token: Optional[str] = None,
|
page_token: Optional[str] = None,
|
||||||
filter_condition: Optional[str] = None,
|
filter_condition: Optional[str] = None,
|
||||||
) -> PaginatedRowsResult: ...
|
) -> PaginatedRowsResult:
|
||||||
|
"""Get a paginated list of rows from a dataset.
|
||||||
|
|
||||||
|
:param dataset_id: The ID of the dataset to get the rows from.
|
||||||
|
:param rows_in_page: The number of rows to get per page.
|
||||||
|
:param page_token: The token to get the next page of rows.
|
||||||
|
:param filter_condition: (Optional) A condition to filter the rows by.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/datasetio/rows", method="POST")
|
@webmethod(route="/datasetio/rows", method="POST")
|
||||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
|
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
|
||||||
|
|
|
@ -19,6 +19,13 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ModelCandidate(BaseModel):
|
class ModelCandidate(BaseModel):
|
||||||
|
"""A model candidate for evaluation.
|
||||||
|
|
||||||
|
:param model: The model ID to evaluate.
|
||||||
|
:param sampling_params: The sampling parameters for the model.
|
||||||
|
:param system_message: (Optional) The system message providing instructions or context to the model.
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["model"] = "model"
|
type: Literal["model"] = "model"
|
||||||
model: str
|
model: str
|
||||||
sampling_params: SamplingParams
|
sampling_params: SamplingParams
|
||||||
|
@ -27,6 +34,11 @@ class ModelCandidate(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentCandidate(BaseModel):
|
class AgentCandidate(BaseModel):
|
||||||
|
"""An agent candidate for evaluation.
|
||||||
|
|
||||||
|
:param config: The configuration for the agent candidate.
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["agent"] = "agent"
|
type: Literal["agent"] = "agent"
|
||||||
config: AgentConfig
|
config: AgentConfig
|
||||||
|
|
||||||
|
@ -39,6 +51,13 @@ EvalCandidate = register_schema(
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class BenchmarkConfig(BaseModel):
|
class BenchmarkConfig(BaseModel):
|
||||||
|
"""A benchmark configuration for evaluation.
|
||||||
|
|
||||||
|
:param eval_candidate: The candidate to evaluate.
|
||||||
|
:param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
|
||||||
|
:param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
|
||||||
|
"""
|
||||||
|
|
||||||
eval_candidate: EvalCandidate
|
eval_candidate: EvalCandidate
|
||||||
scoring_params: Dict[str, ScoringFnParams] = Field(
|
scoring_params: Dict[str, ScoringFnParams] = Field(
|
||||||
description="Map between scoring function id and parameters for each scoring function you want to run",
|
description="Map between scoring function id and parameters for each scoring function you want to run",
|
||||||
|
@ -53,18 +72,32 @@ class BenchmarkConfig(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EvaluateResponse(BaseModel):
|
class EvaluateResponse(BaseModel):
|
||||||
|
"""The response from an evaluation.
|
||||||
|
|
||||||
|
:param generations: The generations from the evaluation.
|
||||||
|
:param scores: The scores from the evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
generations: List[Dict[str, Any]]
|
generations: List[Dict[str, Any]]
|
||||||
# each key in the dict is a scoring function name
|
# each key in the dict is a scoring function name
|
||||||
scores: Dict[str, ScoringResult]
|
scores: Dict[str, ScoringResult]
|
||||||
|
|
||||||
|
|
||||||
class Eval(Protocol):
|
class Eval(Protocol):
|
||||||
|
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
|
||||||
async def run_eval(
|
async def run_eval(
|
||||||
self,
|
self,
|
||||||
benchmark_id: str,
|
benchmark_id: str,
|
||||||
benchmark_config: BenchmarkConfig,
|
benchmark_config: BenchmarkConfig,
|
||||||
) -> Job: ...
|
) -> Job:
|
||||||
|
"""Run an evaluation on a benchmark.
|
||||||
|
|
||||||
|
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
:param benchmark_config: The configuration for the benchmark.
|
||||||
|
:return: The job that was created to run the evaluation.
|
||||||
|
"""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
|
||||||
async def evaluate_rows(
|
async def evaluate_rows(
|
||||||
|
@ -73,13 +106,40 @@ class Eval(Protocol):
|
||||||
input_rows: List[Dict[str, Any]],
|
input_rows: List[Dict[str, Any]],
|
||||||
scoring_functions: List[str],
|
scoring_functions: List[str],
|
||||||
benchmark_config: BenchmarkConfig,
|
benchmark_config: BenchmarkConfig,
|
||||||
) -> EvaluateResponse: ...
|
) -> EvaluateResponse:
|
||||||
|
"""Evaluate a list of rows on a benchmark.
|
||||||
|
|
||||||
|
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
:param input_rows: The rows to evaluate.
|
||||||
|
:param scoring_functions: The scoring functions to use for the evaluation.
|
||||||
|
:param benchmark_config: The configuration for the benchmark.
|
||||||
|
:return: EvaluateResponse object containing generations and scores
|
||||||
|
"""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
|
||||||
async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: ...
|
async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
|
||||||
|
"""Get the status of a job.
|
||||||
|
|
||||||
|
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
:param job_id: The ID of the job to get the status of.
|
||||||
|
:return: The status of the evaluationjob.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
|
||||||
async def job_cancel(self, benchmark_id: str, job_id: str) -> None: ...
|
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
||||||
|
"""Cancel a job.
|
||||||
|
|
||||||
|
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
:param job_id: The ID of the job to cancel.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
|
||||||
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
|
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
||||||
|
"""Get the result of a job.
|
||||||
|
|
||||||
|
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
:param job_id: The ID of the job to get the result of.
|
||||||
|
:return: The result of the job.
|
||||||
|
"""
|
||||||
|
|
|
@ -17,6 +17,13 @@ ScoringResultRow = Dict[str, Any]
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoringResult(BaseModel):
|
class ScoringResult(BaseModel):
|
||||||
|
"""
|
||||||
|
A scoring result for a single row.
|
||||||
|
|
||||||
|
:param score_rows: The scoring result for each row. Each row is a map of column name to value.
|
||||||
|
:param aggregated_results: Map of metric name to aggregated value
|
||||||
|
"""
|
||||||
|
|
||||||
score_rows: List[ScoringResultRow]
|
score_rows: List[ScoringResultRow]
|
||||||
# aggregated metrics to value
|
# aggregated metrics to value
|
||||||
aggregated_results: Dict[str, Any]
|
aggregated_results: Dict[str, Any]
|
||||||
|
@ -30,6 +37,12 @@ class ScoreBatchResponse(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoreResponse(BaseModel):
|
class ScoreResponse(BaseModel):
|
||||||
|
"""
|
||||||
|
The response from scoring.
|
||||||
|
|
||||||
|
:param results: A map of scoring function name to ScoringResult.
|
||||||
|
"""
|
||||||
|
|
||||||
# each key in the dict is a scoring function name
|
# each key in the dict is a scoring function name
|
||||||
results: Dict[str, ScoringResult]
|
results: Dict[str, ScoringResult]
|
||||||
|
|
||||||
|
@ -55,4 +68,11 @@ class Scoring(Protocol):
|
||||||
self,
|
self,
|
||||||
input_rows: List[Dict[str, Any]],
|
input_rows: List[Dict[str, Any]],
|
||||||
scoring_functions: Dict[str, Optional[ScoringFnParams]],
|
scoring_functions: Dict[str, Optional[ScoringFnParams]],
|
||||||
) -> ScoreResponse: ...
|
) -> ScoreResponse:
|
||||||
|
"""Score a list of rows.
|
||||||
|
|
||||||
|
:param input_rows: The rows to score.
|
||||||
|
:param scoring_functions: The scoring functions to use for the scoring.
|
||||||
|
:return: ScoreResponse object containing rows and aggregated results
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue