mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
docs: api documentation for agents/eval/scoring/datasets (#1400)
# What does this PR do? - add some docs to OpenAPI for agents/eval/scoring/datasetio [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - read [//]: # (## Documentation)
This commit is contained in:
parent
0d18274d34
commit
3d9331840e
6 changed files with 586 additions and 137 deletions
291
docs/_static/llama-stack-spec.html
vendored
291
docs/_static/llama-stack-spec.html
vendored
|
@ -69,11 +69,12 @@
|
|||
"tags": [
|
||||
"DatasetIO"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Get a paginated list of rows from a dataset.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "query",
|
||||
"description": "The ID of the dataset to get the rows from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -82,6 +83,7 @@
|
|||
{
|
||||
"name": "rows_in_page",
|
||||
"in": "query",
|
||||
"description": "The number of rows to get per page.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
|
@ -90,6 +92,7 @@
|
|||
{
|
||||
"name": "page_token",
|
||||
"in": "query",
|
||||
"description": "The token to get the next page of rows.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -98,6 +101,7 @@
|
|||
{
|
||||
"name": "filter_condition",
|
||||
"in": "query",
|
||||
"description": "(Optional) A condition to filter the rows by.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -362,7 +366,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "An AgentCreateResponse with the agent ID.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -387,7 +391,7 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Create an agent with the given configuration.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -405,7 +409,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "An AgentSessionCreateResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -430,11 +434,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Create a new session for an agent.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to create the session for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -457,7 +462,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
|
||||
"description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -487,11 +492,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Create a new turn for an agent.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to create the turn for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -500,6 +506,7 @@
|
|||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the session to create the turn for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -623,11 +630,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Delete an agent by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to delete.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -665,11 +673,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Retrieve an agent session by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the session to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -678,6 +687,7 @@
|
|||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to get the session for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -686,6 +696,7 @@
|
|||
{
|
||||
"name": "turn_ids",
|
||||
"in": "query",
|
||||
"description": "(Optional) List of turn IDs to filter the session by.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "array",
|
||||
|
@ -717,11 +728,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Delete an agent session by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the session to delete.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -730,6 +742,7 @@
|
|||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to delete the session for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -887,7 +900,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "EvaluateResponse object containing generations and scores",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -912,11 +925,12 @@
|
|||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Evaluate a list of rows on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -939,7 +953,7 @@
|
|||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "An AgentStepResponse.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -964,11 +978,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Retrieve an agent step by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to get the step for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -977,6 +992,7 @@
|
|||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the session to get the step for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -985,6 +1001,7 @@
|
|||
{
|
||||
"name": "turn_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the turn to get the step for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -993,6 +1010,7 @@
|
|||
{
|
||||
"name": "step_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the step to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -1005,7 +1023,7 @@
|
|||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "A Turn.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -1030,11 +1048,12 @@
|
|||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Retrieve an agent turn by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the agent to get the turn for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -1043,6 +1062,7 @@
|
|||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the session to get the turn for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -1051,6 +1071,7 @@
|
|||
{
|
||||
"name": "turn_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the turn to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -2105,7 +2126,7 @@
|
|||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "The status of the evaluationjob.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -2137,11 +2158,12 @@
|
|||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Get the status of a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -2150,6 +2172,7 @@
|
|||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the status of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -2178,11 +2201,12 @@
|
|||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Cancel a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -2191,6 +2215,7 @@
|
|||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -2203,7 +2228,7 @@
|
|||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "The result of the job.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -2228,11 +2253,12 @@
|
|||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Get the result of a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -2241,6 +2267,7 @@
|
|||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the result of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -3271,7 +3298,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "The job that was created to run the evaluation.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -3296,11 +3323,12 @@
|
|||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Run an evaluation on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
|
@ -3402,7 +3430,7 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "ScoreResponse object containing rows and aggregated results",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
|
@ -3427,7 +3455,7 @@
|
|||
"tags": [
|
||||
"Scoring"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Score a list of rows.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -5192,7 +5220,8 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"agent_config": {
|
||||
"$ref": "#/components/schemas/AgentConfig"
|
||||
"$ref": "#/components/schemas/AgentConfig",
|
||||
"description": "The configuration for the agent."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5218,7 +5247,8 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"session_name": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The name of the session to create."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5254,10 +5284,12 @@
|
|||
"$ref": "#/components/schemas/ToolResponseMessage"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "List of messages to start the turn with."
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
"type": "boolean",
|
||||
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
|
||||
},
|
||||
"documents": {
|
||||
"type": "array",
|
||||
|
@ -5281,10 +5313,12 @@
|
|||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "The content of the document."
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The MIME type of the document."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5292,17 +5326,21 @@
|
|||
"content",
|
||||
"mime_type"
|
||||
],
|
||||
"title": "Document"
|
||||
}
|
||||
"title": "Document",
|
||||
"description": "A document to be used by an agent."
|
||||
},
|
||||
"description": "(Optional) List of documents to create the turn with."
|
||||
},
|
||||
"toolgroups": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AgentTool"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request."
|
||||
},
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig"
|
||||
"$ref": "#/components/schemas/ToolConfig",
|
||||
"description": "(Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5315,18 +5353,22 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"turn_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the step."
|
||||
},
|
||||
"started_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step started."
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step completed."
|
||||
},
|
||||
"step_type": {
|
||||
"type": "string",
|
||||
|
@ -5334,7 +5376,8 @@
|
|||
"default": "inference"
|
||||
},
|
||||
"model_response": {
|
||||
"$ref": "#/components/schemas/CompletionMessage"
|
||||
"$ref": "#/components/schemas/CompletionMessage",
|
||||
"description": "The response from the LLM."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5344,24 +5387,29 @@
|
|||
"step_type",
|
||||
"model_response"
|
||||
],
|
||||
"title": "InferenceStep"
|
||||
"title": "InferenceStep",
|
||||
"description": "An inference step in an agent turn."
|
||||
},
|
||||
"MemoryRetrievalStep": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"turn_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the step."
|
||||
},
|
||||
"started_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step started."
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step completed."
|
||||
},
|
||||
"step_type": {
|
||||
"type": "string",
|
||||
|
@ -5369,10 +5417,12 @@
|
|||
"default": "memory_retrieval"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The IDs of the vector databases to retrieve context from."
|
||||
},
|
||||
"inserted_context": {
|
||||
"$ref": "#/components/schemas/InterleavedContent"
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The context retrieved from the vector databases."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5383,7 +5433,8 @@
|
|||
"vector_db_ids",
|
||||
"inserted_context"
|
||||
],
|
||||
"title": "MemoryRetrievalStep"
|
||||
"title": "MemoryRetrievalStep",
|
||||
"description": "A memory retrieval step in an agent turn."
|
||||
},
|
||||
"SafetyViolation": {
|
||||
"type": "object",
|
||||
|
@ -5431,18 +5482,22 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"turn_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the step."
|
||||
},
|
||||
"started_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step started."
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step completed."
|
||||
},
|
||||
"step_type": {
|
||||
"type": "string",
|
||||
|
@ -5450,7 +5505,8 @@
|
|||
"default": "shield_call"
|
||||
},
|
||||
"violation": {
|
||||
"$ref": "#/components/schemas/SafetyViolation"
|
||||
"$ref": "#/components/schemas/SafetyViolation",
|
||||
"description": "The violation from the shield call."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5459,24 +5515,29 @@
|
|||
"step_id",
|
||||
"step_type"
|
||||
],
|
||||
"title": "ShieldCallStep"
|
||||
"title": "ShieldCallStep",
|
||||
"description": "A shield call step in an agent turn."
|
||||
},
|
||||
"ToolExecutionStep": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"turn_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the step."
|
||||
},
|
||||
"started_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step started."
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
"format": "date-time",
|
||||
"description": "The time the step completed."
|
||||
},
|
||||
"step_type": {
|
||||
"type": "string",
|
||||
|
@ -5487,13 +5548,15 @@
|
|||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolCall"
|
||||
}
|
||||
},
|
||||
"description": "The tool calls to execute."
|
||||
},
|
||||
"tool_responses": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponse"
|
||||
}
|
||||
},
|
||||
"description": "The tool responses from the tool calls."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5504,7 +5567,8 @@
|
|||
"tool_calls",
|
||||
"tool_responses"
|
||||
],
|
||||
"title": "ToolExecutionStep"
|
||||
"title": "ToolExecutionStep",
|
||||
"description": "A tool execution step in an agent turn."
|
||||
},
|
||||
"ToolResponse": {
|
||||
"type": "object",
|
||||
|
@ -5641,10 +5705,12 @@
|
|||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "The content of the attachment."
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The MIME type of the attachment."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -5652,7 +5718,8 @@
|
|||
"content",
|
||||
"mime_type"
|
||||
],
|
||||
"title": "Attachment"
|
||||
"title": "Attachment",
|
||||
"description": "An attachment to an agent turn."
|
||||
}
|
||||
},
|
||||
"started_at": {
|
||||
|
@ -5747,7 +5814,8 @@
|
|||
"shield_call",
|
||||
"memory_retrieval"
|
||||
],
|
||||
"title": "StepType"
|
||||
"title": "StepType",
|
||||
"description": "Type of the step in an agent turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
|
@ -5803,7 +5871,8 @@
|
|||
"shield_call",
|
||||
"memory_retrieval"
|
||||
],
|
||||
"title": "StepType"
|
||||
"title": "StepType",
|
||||
"description": "Type of the step in an agent turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
|
@ -5837,7 +5906,8 @@
|
|||
"shield_call",
|
||||
"memory_retrieval"
|
||||
],
|
||||
"title": "StepType"
|
||||
"title": "StepType",
|
||||
"description": "Type of the step in an agent turn."
|
||||
},
|
||||
"step_id": {
|
||||
"type": "string"
|
||||
|
@ -6129,7 +6199,8 @@
|
|||
"default": "agent"
|
||||
},
|
||||
"config": {
|
||||
"$ref": "#/components/schemas/AgentConfig"
|
||||
"$ref": "#/components/schemas/AgentConfig",
|
||||
"description": "The configuration for the agent candidate."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6137,7 +6208,8 @@
|
|||
"type",
|
||||
"config"
|
||||
],
|
||||
"title": "AgentCandidate"
|
||||
"title": "AgentCandidate",
|
||||
"description": "An agent candidate for evaluation."
|
||||
},
|
||||
"AggregationFunctionType": {
|
||||
"type": "string",
|
||||
|
@ -6174,16 +6246,19 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"eval_candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate"
|
||||
"$ref": "#/components/schemas/EvalCandidate",
|
||||
"description": "The candidate to evaluate."
|
||||
},
|
||||
"scoring_params": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
}
|
||||
},
|
||||
"description": "Map between scoring function id and parameters for each scoring function you want to run"
|
||||
},
|
||||
"num_examples": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6191,7 +6266,8 @@
|
|||
"eval_candidate",
|
||||
"scoring_params"
|
||||
],
|
||||
"title": "BenchmarkConfig"
|
||||
"title": "BenchmarkConfig",
|
||||
"description": "A benchmark configuration for evaluation."
|
||||
},
|
||||
"EvalCandidate": {
|
||||
"oneOf": [
|
||||
|
@ -6253,13 +6329,16 @@
|
|||
"default": "model"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The model ID to evaluate."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams"
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "The sampling parameters for the model."
|
||||
},
|
||||
"system_message": {
|
||||
"$ref": "#/components/schemas/SystemMessage"
|
||||
"$ref": "#/components/schemas/SystemMessage",
|
||||
"description": "(Optional) The system message providing instructions or context to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6268,7 +6347,8 @@
|
|||
"model",
|
||||
"sampling_params"
|
||||
],
|
||||
"title": "ModelCandidate"
|
||||
"title": "ModelCandidate",
|
||||
"description": "A model candidate for evaluation."
|
||||
},
|
||||
"RegexParserScoringFnParams": {
|
||||
"type": "object",
|
||||
|
@ -6347,16 +6427,19 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "The rows to evaluate."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "The scoring functions to use for the evaluation."
|
||||
},
|
||||
"benchmark_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig"
|
||||
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||
"description": "The configuration for the benchmark."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6396,13 +6479,15 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "The generations from the evaluation."
|
||||
},
|
||||
"scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
}
|
||||
},
|
||||
"description": "The scores from the evaluation."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6410,7 +6495,8 @@
|
|||
"generations",
|
||||
"scores"
|
||||
],
|
||||
"title": "EvaluateResponse"
|
||||
"title": "EvaluateResponse",
|
||||
"description": "The response from an evaluation."
|
||||
},
|
||||
"ScoringResult": {
|
||||
"type": "object",
|
||||
|
@ -6441,7 +6527,8 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "The scoring result for each row. Each row is a map of column name to value."
|
||||
},
|
||||
"aggregated_results": {
|
||||
"type": "object",
|
||||
|
@ -6466,7 +6553,8 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Map of metric name to aggregated value"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6474,7 +6562,8 @@
|
|||
"score_rows",
|
||||
"aggregated_results"
|
||||
],
|
||||
"title": "ScoringResult"
|
||||
"title": "ScoringResult",
|
||||
"description": "A scoring result for a single row."
|
||||
},
|
||||
"Session": {
|
||||
"type": "object",
|
||||
|
@ -6963,13 +7052,16 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "The rows in the current page."
|
||||
},
|
||||
"total_count": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"description": "The total number of rows in the dataset."
|
||||
},
|
||||
"next_page_token": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The token to get the next page of rows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -6977,7 +7069,8 @@
|
|||
"rows",
|
||||
"total_count"
|
||||
],
|
||||
"title": "PaginatedRowsResult"
|
||||
"title": "PaginatedRowsResult",
|
||||
"description": "A paginated list of rows from a dataset."
|
||||
},
|
||||
"ScoringFn": {
|
||||
"type": "object",
|
||||
|
@ -9249,7 +9342,8 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"benchmark_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig"
|
||||
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||
"description": "The configuration for the benchmark."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -9386,7 +9480,8 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "The rows to score."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "object",
|
||||
|
@ -9399,7 +9494,8 @@
|
|||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The scoring functions to use for the scoring."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -9416,14 +9512,16 @@
|
|||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
}
|
||||
},
|
||||
"description": "A map of scoring function name to ScoringResult."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"results"
|
||||
],
|
||||
"title": "ScoreResponse"
|
||||
"title": "ScoreResponse",
|
||||
"description": "The response from scoring."
|
||||
},
|
||||
"ScoreBatchRequest": {
|
||||
"type": "object",
|
||||
|
@ -9838,7 +9936,8 @@
|
|||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "Eval"
|
||||
"name": "Eval",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
},
|
||||
{
|
||||
"name": "Files (Coming Soon)"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue