diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 643e1faee..68f27ef3b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -69,11 +69,12 @@
"tags": [
"DatasetIO"
],
- "description": "",
+ "description": "Get a paginated list of rows from a dataset.",
"parameters": [
{
"name": "dataset_id",
"in": "query",
+ "description": "The ID of the dataset to get the rows from.",
"required": true,
"schema": {
"type": "string"
@@ -82,6 +83,7 @@
{
"name": "rows_in_page",
"in": "query",
+ "description": "The number of rows to get per page.",
"required": true,
"schema": {
"type": "integer"
@@ -90,6 +92,7 @@
{
"name": "page_token",
"in": "query",
+ "description": "The token to get the next page of rows.",
"required": false,
"schema": {
"type": "string"
@@ -98,6 +101,7 @@
{
"name": "filter_condition",
"in": "query",
+ "description": "(Optional) A condition to filter the rows by.",
"required": false,
"schema": {
"type": "string"
@@ -362,7 +366,7 @@
"post": {
"responses": {
"200": {
- "description": "OK",
+ "description": "An AgentCreateResponse with the agent ID.",
"content": {
"application/json": {
"schema": {
@@ -387,7 +391,7 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Create an agent with the given configuration.",
"parameters": [],
"requestBody": {
"content": {
@@ -405,7 +409,7 @@
"post": {
"responses": {
"200": {
- "description": "OK",
+ "description": "An AgentSessionCreateResponse.",
"content": {
"application/json": {
"schema": {
@@ -430,11 +434,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Create a new session for an agent.",
"parameters": [
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to create the session for.",
"required": true,
"schema": {
"type": "string"
@@ -457,7 +462,7 @@
"post": {
"responses": {
"200": {
- "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
+ "description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk",
"content": {
"application/json": {
"schema": {
@@ -487,11 +492,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Create a new turn for an agent.",
"parameters": [
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to create the turn for.",
"required": true,
"schema": {
"type": "string"
@@ -500,6 +506,7 @@
{
"name": "session_id",
"in": "path",
+ "description": "The ID of the session to create the turn for.",
"required": true,
"schema": {
"type": "string"
@@ -623,11 +630,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Delete an agent by its ID.",
"parameters": [
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to delete.",
"required": true,
"schema": {
"type": "string"
@@ -665,11 +673,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Retrieve an agent session by its ID.",
"parameters": [
{
"name": "session_id",
"in": "path",
+ "description": "The ID of the session to get.",
"required": true,
"schema": {
"type": "string"
@@ -678,6 +687,7 @@
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to get the session for.",
"required": true,
"schema": {
"type": "string"
@@ -686,6 +696,7 @@
{
"name": "turn_ids",
"in": "query",
+ "description": "(Optional) List of turn IDs to filter the session by.",
"required": false,
"schema": {
"type": "array",
@@ -717,11 +728,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Delete an agent session by its ID.",
"parameters": [
{
"name": "session_id",
"in": "path",
+ "description": "The ID of the session to delete.",
"required": true,
"schema": {
"type": "string"
@@ -730,6 +742,7 @@
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to delete the session for.",
"required": true,
"schema": {
"type": "string"
@@ -887,7 +900,7 @@
"post": {
"responses": {
"200": {
- "description": "OK",
+ "description": "EvaluateResponse object containing generations and scores",
"content": {
"application/json": {
"schema": {
@@ -912,11 +925,12 @@
"tags": [
"Eval"
],
- "description": "",
+ "description": "Evaluate a list of rows on a benchmark.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -939,7 +953,7 @@
"get": {
"responses": {
"200": {
- "description": "OK",
+ "description": "An AgentStepResponse.",
"content": {
"application/json": {
"schema": {
@@ -964,11 +978,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Retrieve an agent step by its ID.",
"parameters": [
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to get the step for.",
"required": true,
"schema": {
"type": "string"
@@ -977,6 +992,7 @@
{
"name": "session_id",
"in": "path",
+ "description": "The ID of the session to get the step for.",
"required": true,
"schema": {
"type": "string"
@@ -985,6 +1001,7 @@
{
"name": "turn_id",
"in": "path",
+ "description": "The ID of the turn to get the step for.",
"required": true,
"schema": {
"type": "string"
@@ -993,6 +1010,7 @@
{
"name": "step_id",
"in": "path",
+ "description": "The ID of the step to get.",
"required": true,
"schema": {
"type": "string"
@@ -1005,7 +1023,7 @@
"get": {
"responses": {
"200": {
- "description": "OK",
+ "description": "A Turn.",
"content": {
"application/json": {
"schema": {
@@ -1030,11 +1048,12 @@
"tags": [
"Agents"
],
- "description": "",
+ "description": "Retrieve an agent turn by its ID.",
"parameters": [
{
"name": "agent_id",
"in": "path",
+ "description": "The ID of the agent to get the turn for.",
"required": true,
"schema": {
"type": "string"
@@ -1043,6 +1062,7 @@
{
"name": "session_id",
"in": "path",
+ "description": "The ID of the session to get the turn for.",
"required": true,
"schema": {
"type": "string"
@@ -1051,6 +1071,7 @@
{
"name": "turn_id",
"in": "path",
+ "description": "The ID of the turn to get.",
"required": true,
"schema": {
"type": "string"
@@ -2105,7 +2126,7 @@
"get": {
"responses": {
"200": {
- "description": "OK",
+ "description": "The status of the evaluationjob.",
"content": {
"application/json": {
"schema": {
@@ -2137,11 +2158,12 @@
"tags": [
"Eval"
],
- "description": "",
+ "description": "Get the status of a job.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -2150,6 +2172,7 @@
{
"name": "job_id",
"in": "path",
+ "description": "The ID of the job to get the status of.",
"required": true,
"schema": {
"type": "string"
@@ -2178,11 +2201,12 @@
"tags": [
"Eval"
],
- "description": "",
+ "description": "Cancel a job.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -2191,6 +2215,7 @@
{
"name": "job_id",
"in": "path",
+ "description": "The ID of the job to cancel.",
"required": true,
"schema": {
"type": "string"
@@ -2203,7 +2228,7 @@
"get": {
"responses": {
"200": {
- "description": "OK",
+ "description": "The result of the job.",
"content": {
"application/json": {
"schema": {
@@ -2228,11 +2253,12 @@
"tags": [
"Eval"
],
- "description": "",
+ "description": "Get the result of a job.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -2241,6 +2267,7 @@
{
"name": "job_id",
"in": "path",
+ "description": "The ID of the job to get the result of.",
"required": true,
"schema": {
"type": "string"
@@ -3271,7 +3298,7 @@
"post": {
"responses": {
"200": {
- "description": "OK",
+ "description": "The job that was created to run the evaluation.",
"content": {
"application/json": {
"schema": {
@@ -3296,11 +3323,12 @@
"tags": [
"Eval"
],
- "description": "",
+ "description": "Run an evaluation on a benchmark.",
"parameters": [
{
"name": "benchmark_id",
"in": "path",
+ "description": "The ID of the benchmark to run the evaluation on.",
"required": true,
"schema": {
"type": "string"
@@ -3402,7 +3430,7 @@
"post": {
"responses": {
"200": {
- "description": "OK",
+ "description": "ScoreResponse object containing rows and aggregated results",
"content": {
"application/json": {
"schema": {
@@ -3427,7 +3455,7 @@
"tags": [
"Scoring"
],
- "description": "",
+ "description": "Score a list of rows.",
"parameters": [],
"requestBody": {
"content": {
@@ -5192,7 +5220,8 @@
"type": "object",
"properties": {
"agent_config": {
- "$ref": "#/components/schemas/AgentConfig"
+ "$ref": "#/components/schemas/AgentConfig",
+ "description": "The configuration for the agent."
}
},
"additionalProperties": false,
@@ -5218,7 +5247,8 @@
"type": "object",
"properties": {
"session_name": {
- "type": "string"
+ "type": "string",
+ "description": "The name of the session to create."
}
},
"additionalProperties": false,
@@ -5254,10 +5284,12 @@
"$ref": "#/components/schemas/ToolResponseMessage"
}
]
- }
+ },
+ "description": "List of messages to start the turn with."
},
"stream": {
- "type": "boolean"
+ "type": "boolean",
+ "description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"documents": {
"type": "array",
@@ -5281,10 +5313,12 @@
{
"$ref": "#/components/schemas/URL"
}
- ]
+ ],
+ "description": "The content of the document."
},
"mime_type": {
- "type": "string"
+ "type": "string",
+ "description": "The MIME type of the document."
}
},
"additionalProperties": false,
@@ -5292,17 +5326,21 @@
"content",
"mime_type"
],
- "title": "Document"
- }
+ "title": "Document",
+ "description": "A document to be used by an agent."
+ },
+ "description": "(Optional) List of documents to create the turn with."
},
"toolgroups": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
- }
+ },
+ "description": "(Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request."
},
"tool_config": {
- "$ref": "#/components/schemas/ToolConfig"
+ "$ref": "#/components/schemas/ToolConfig",
+ "description": "(Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config."
}
},
"additionalProperties": false,
@@ -5315,18 +5353,22 @@
"type": "object",
"properties": {
"turn_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the turn."
},
"step_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the step."
},
"started_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step started."
},
"completed_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step completed."
},
"step_type": {
"type": "string",
@@ -5334,7 +5376,8 @@
"default": "inference"
},
"model_response": {
- "$ref": "#/components/schemas/CompletionMessage"
+ "$ref": "#/components/schemas/CompletionMessage",
+ "description": "The response from the LLM."
}
},
"additionalProperties": false,
@@ -5344,24 +5387,29 @@
"step_type",
"model_response"
],
- "title": "InferenceStep"
+ "title": "InferenceStep",
+ "description": "An inference step in an agent turn."
},
"MemoryRetrievalStep": {
"type": "object",
"properties": {
"turn_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the turn."
},
"step_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the step."
},
"started_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step started."
},
"completed_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step completed."
},
"step_type": {
"type": "string",
@@ -5369,10 +5417,12 @@
"default": "memory_retrieval"
},
"vector_db_ids": {
- "type": "string"
+ "type": "string",
+ "description": "The IDs of the vector databases to retrieve context from."
},
"inserted_context": {
- "$ref": "#/components/schemas/InterleavedContent"
+ "$ref": "#/components/schemas/InterleavedContent",
+ "description": "The context retrieved from the vector databases."
}
},
"additionalProperties": false,
@@ -5383,7 +5433,8 @@
"vector_db_ids",
"inserted_context"
],
- "title": "MemoryRetrievalStep"
+ "title": "MemoryRetrievalStep",
+ "description": "A memory retrieval step in an agent turn."
},
"SafetyViolation": {
"type": "object",
@@ -5431,18 +5482,22 @@
"type": "object",
"properties": {
"turn_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the turn."
},
"step_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the step."
},
"started_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step started."
},
"completed_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step completed."
},
"step_type": {
"type": "string",
@@ -5450,7 +5505,8 @@
"default": "shield_call"
},
"violation": {
- "$ref": "#/components/schemas/SafetyViolation"
+ "$ref": "#/components/schemas/SafetyViolation",
+ "description": "The violation from the shield call."
}
},
"additionalProperties": false,
@@ -5459,24 +5515,29 @@
"step_id",
"step_type"
],
- "title": "ShieldCallStep"
+ "title": "ShieldCallStep",
+ "description": "A shield call step in an agent turn."
},
"ToolExecutionStep": {
"type": "object",
"properties": {
"turn_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the turn."
},
"step_id": {
- "type": "string"
+ "type": "string",
+ "description": "The ID of the step."
},
"started_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step started."
},
"completed_at": {
"type": "string",
- "format": "date-time"
+ "format": "date-time",
+ "description": "The time the step completed."
},
"step_type": {
"type": "string",
@@ -5487,13 +5548,15 @@
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolCall"
- }
+ },
+ "description": "The tool calls to execute."
},
"tool_responses": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponse"
- }
+ },
+ "description": "The tool responses from the tool calls."
}
},
"additionalProperties": false,
@@ -5504,7 +5567,8 @@
"tool_calls",
"tool_responses"
],
- "title": "ToolExecutionStep"
+ "title": "ToolExecutionStep",
+ "description": "A tool execution step in an agent turn."
},
"ToolResponse": {
"type": "object",
@@ -5641,10 +5705,12 @@
{
"$ref": "#/components/schemas/URL"
}
- ]
+ ],
+ "description": "The content of the attachment."
},
"mime_type": {
- "type": "string"
+ "type": "string",
+ "description": "The MIME type of the attachment."
}
},
"additionalProperties": false,
@@ -5652,7 +5718,8 @@
"content",
"mime_type"
],
- "title": "Attachment"
+ "title": "Attachment",
+ "description": "An attachment to an agent turn."
}
},
"started_at": {
@@ -5747,7 +5814,8 @@
"shield_call",
"memory_retrieval"
],
- "title": "StepType"
+ "title": "StepType",
+ "description": "Type of the step in an agent turn."
},
"step_id": {
"type": "string"
@@ -5803,7 +5871,8 @@
"shield_call",
"memory_retrieval"
],
- "title": "StepType"
+ "title": "StepType",
+ "description": "Type of the step in an agent turn."
},
"step_id": {
"type": "string"
@@ -5837,7 +5906,8 @@
"shield_call",
"memory_retrieval"
],
- "title": "StepType"
+ "title": "StepType",
+ "description": "Type of the step in an agent turn."
},
"step_id": {
"type": "string"
@@ -6129,7 +6199,8 @@
"default": "agent"
},
"config": {
- "$ref": "#/components/schemas/AgentConfig"
+ "$ref": "#/components/schemas/AgentConfig",
+ "description": "The configuration for the agent candidate."
}
},
"additionalProperties": false,
@@ -6137,7 +6208,8 @@
"type",
"config"
],
- "title": "AgentCandidate"
+ "title": "AgentCandidate",
+ "description": "An agent candidate for evaluation."
},
"AggregationFunctionType": {
"type": "string",
@@ -6174,16 +6246,19 @@
"type": "object",
"properties": {
"eval_candidate": {
- "$ref": "#/components/schemas/EvalCandidate"
+ "$ref": "#/components/schemas/EvalCandidate",
+ "description": "The candidate to evaluate."
},
"scoring_params": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringFnParams"
- }
+ },
+ "description": "Map between scoring function id and parameters for each scoring function you want to run"
},
"num_examples": {
- "type": "integer"
+ "type": "integer",
+ "description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
}
},
"additionalProperties": false,
@@ -6191,7 +6266,8 @@
"eval_candidate",
"scoring_params"
],
- "title": "BenchmarkConfig"
+ "title": "BenchmarkConfig",
+ "description": "A benchmark configuration for evaluation."
},
"EvalCandidate": {
"oneOf": [
@@ -6253,13 +6329,16 @@
"default": "model"
},
"model": {
- "type": "string"
+ "type": "string",
+ "description": "The model ID to evaluate."
},
"sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
+ "$ref": "#/components/schemas/SamplingParams",
+ "description": "The sampling parameters for the model."
},
"system_message": {
- "$ref": "#/components/schemas/SystemMessage"
+ "$ref": "#/components/schemas/SystemMessage",
+ "description": "(Optional) The system message providing instructions or context to the model."
}
},
"additionalProperties": false,
@@ -6268,7 +6347,8 @@
"model",
"sampling_params"
],
- "title": "ModelCandidate"
+ "title": "ModelCandidate",
+ "description": "A model candidate for evaluation."
},
"RegexParserScoringFnParams": {
"type": "object",
@@ -6347,16 +6427,19 @@
}
]
}
- }
+ },
+ "description": "The rows to evaluate."
},
"scoring_functions": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The scoring functions to use for the evaluation."
},
"benchmark_config": {
- "$ref": "#/components/schemas/BenchmarkConfig"
+ "$ref": "#/components/schemas/BenchmarkConfig",
+ "description": "The configuration for the benchmark."
}
},
"additionalProperties": false,
@@ -6396,13 +6479,15 @@
}
]
}
- }
+ },
+ "description": "The generations from the evaluation."
},
"scores": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
- }
+ },
+ "description": "The scores from the evaluation."
}
},
"additionalProperties": false,
@@ -6410,7 +6495,8 @@
"generations",
"scores"
],
- "title": "EvaluateResponse"
+ "title": "EvaluateResponse",
+ "description": "The response from an evaluation."
},
"ScoringResult": {
"type": "object",
@@ -6441,7 +6527,8 @@
}
]
}
- }
+ },
+ "description": "The scoring result for each row. Each row is a map of column name to value."
},
"aggregated_results": {
"type": "object",
@@ -6466,7 +6553,8 @@
"type": "object"
}
]
- }
+ },
+ "description": "Map of metric name to aggregated value"
}
},
"additionalProperties": false,
@@ -6474,7 +6562,8 @@
"score_rows",
"aggregated_results"
],
- "title": "ScoringResult"
+ "title": "ScoringResult",
+ "description": "A scoring result for a single row."
},
"Session": {
"type": "object",
@@ -6963,13 +7052,16 @@
}
]
}
- }
+ },
+ "description": "The rows in the current page."
},
"total_count": {
- "type": "integer"
+ "type": "integer",
+ "description": "The total number of rows in the dataset."
},
"next_page_token": {
- "type": "string"
+ "type": "string",
+ "description": "The token to get the next page of rows."
}
},
"additionalProperties": false,
@@ -6977,7 +7069,8 @@
"rows",
"total_count"
],
- "title": "PaginatedRowsResult"
+ "title": "PaginatedRowsResult",
+ "description": "A paginated list of rows from a dataset."
},
"ScoringFn": {
"type": "object",
@@ -9249,7 +9342,8 @@
"type": "object",
"properties": {
"benchmark_config": {
- "$ref": "#/components/schemas/BenchmarkConfig"
+ "$ref": "#/components/schemas/BenchmarkConfig",
+ "description": "The configuration for the benchmark."
}
},
"additionalProperties": false,
@@ -9386,7 +9480,8 @@
}
]
}
- }
+ },
+ "description": "The rows to score."
},
"scoring_functions": {
"type": "object",
@@ -9399,7 +9494,8 @@
"type": "null"
}
]
- }
+ },
+ "description": "The scoring functions to use for the scoring."
}
},
"additionalProperties": false,
@@ -9416,14 +9512,16 @@
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
- }
+ },
+ "description": "A map of scoring function name to ScoringResult."
}
},
"additionalProperties": false,
"required": [
"results"
],
- "title": "ScoreResponse"
+ "title": "ScoreResponse",
+ "description": "The response from scoring."
},
"ScoreBatchRequest": {
"type": "object",
@@ -9838,7 +9936,8 @@
"name": "Datasets"
},
{
- "name": "Eval"
+ "name": "Eval",
+ "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
},
{
"name": "Files (Coming Soon)"
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index eb31b61fb..bb994b0c5 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -31,25 +31,32 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
- description: ''
+ description: >-
+ Get a paginated list of rows from a dataset.
parameters:
- name: dataset_id
in: query
+ description: >-
+ The ID of the dataset to get the rows from.
required: true
schema:
type: string
- name: rows_in_page
in: query
+ description: The number of rows to get per page.
required: true
schema:
type: integer
- name: page_token
in: query
+ description: The token to get the next page of rows.
required: false
schema:
type: string
- name: filter_condition
in: query
+ description: >-
+ (Optional) A condition to filter the rows by.
required: false
schema:
type: string
@@ -234,7 +241,8 @@ paths:
post:
responses:
'200':
- description: OK
+ description: >-
+ An AgentCreateResponse with the agent ID.
content:
application/json:
schema:
@@ -251,7 +259,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: >-
+ Create an agent with the given configuration.
parameters: []
requestBody:
content:
@@ -263,7 +272,7 @@ paths:
post:
responses:
'200':
- description: OK
+ description: An AgentSessionCreateResponse.
content:
application/json:
schema:
@@ -280,10 +289,12 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Create a new session for an agent.
parameters:
- name: agent_id
in: path
+ description: >-
+ The ID of the agent to create the session for.
required: true
schema:
type: string
@@ -298,8 +309,8 @@ paths:
responses:
'200':
description: >-
- A single turn in an interaction with an Agentic System. **OR** streamed
- agent turn completion response.
+ If stream=False, returns a Turn object. If stream=True, returns an SSE
+ event stream of AgentTurnResponseStreamChunk
content:
application/json:
schema:
@@ -319,15 +330,19 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Create a new turn for an agent.
parameters:
- name: agent_id
in: path
+ description: >-
+ The ID of the agent to create the turn for.
required: true
schema:
type: string
- name: session_id
in: path
+ description: >-
+ The ID of the session to create the turn for.
required: true
schema:
type: string
@@ -411,10 +426,11 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Delete an agent by its ID.
parameters:
- name: agent_id
in: path
+ description: The ID of the agent to delete.
required: true
schema:
type: string
@@ -439,20 +455,25 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Retrieve an agent session by its ID.
parameters:
- name: session_id
in: path
+ description: The ID of the session to get.
required: true
schema:
type: string
- name: agent_id
in: path
+ description: >-
+ The ID of the agent to get the session for.
required: true
schema:
type: string
- name: turn_ids
in: query
+ description: >-
+ (Optional) List of turn IDs to filter the session by.
required: false
schema:
type: array
@@ -474,15 +495,18 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Delete an agent session by its ID.
parameters:
- name: session_id
in: path
+ description: The ID of the session to delete.
required: true
schema:
type: string
- name: agent_id
in: path
+ description: >-
+ The ID of the agent to delete the session for.
required: true
schema:
type: string
@@ -596,7 +620,8 @@ paths:
post:
responses:
'200':
- description: OK
+ description: >-
+ EvaluateResponse object containing generations and scores
content:
application/json:
schema:
@@ -613,10 +638,12 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
- description: ''
+ description: Evaluate a list of rows on a benchmark.
parameters:
- name: benchmark_id
in: path
+ description: >-
+ The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
@@ -630,7 +657,7 @@ paths:
get:
responses:
'200':
- description: OK
+ description: An AgentStepResponse.
content:
application/json:
schema:
@@ -647,25 +674,30 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Retrieve an agent step by its ID.
parameters:
- name: agent_id
in: path
+ description: The ID of the agent to get the step for.
required: true
schema:
type: string
- name: session_id
in: path
+ description: >-
+ The ID of the session to get the step for.
required: true
schema:
type: string
- name: turn_id
in: path
+ description: The ID of the turn to get the step for.
required: true
schema:
type: string
- name: step_id
in: path
+ description: The ID of the step to get.
required: true
schema:
type: string
@@ -673,7 +705,7 @@ paths:
get:
responses:
'200':
- description: OK
+ description: A Turn.
content:
application/json:
schema:
@@ -690,20 +722,24 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
- description: ''
+ description: Retrieve an agent turn by its ID.
parameters:
- name: agent_id
in: path
+ description: The ID of the agent to get the turn for.
required: true
schema:
type: string
- name: session_id
in: path
+ description: >-
+ The ID of the session to get the turn for.
required: true
schema:
type: string
- name: turn_id
in: path
+ description: The ID of the turn to get.
required: true
schema:
type: string
@@ -1391,7 +1427,7 @@ paths:
get:
responses:
'200':
- description: OK
+ description: The status of the evaluationjob.
content:
application/json:
schema:
@@ -1410,15 +1446,18 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
- description: ''
+ description: Get the status of a job.
parameters:
- name: benchmark_id
in: path
+ description: >-
+ The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
+ description: The ID of the job to get the status of.
required: true
schema:
type: string
@@ -1438,15 +1477,18 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
- description: ''
+ description: Cancel a job.
parameters:
- name: benchmark_id
in: path
+ description: >-
+ The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
+ description: The ID of the job to cancel.
required: true
schema:
type: string
@@ -1454,7 +1496,7 @@ paths:
get:
responses:
'200':
- description: OK
+ description: The result of the job.
content:
application/json:
schema:
@@ -1471,15 +1513,18 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
- description: ''
+ description: Get the result of a job.
parameters:
- name: benchmark_id
in: path
+ description: >-
+ The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
+ description: The ID of the job to get the result of.
required: true
schema:
type: string
@@ -2192,7 +2237,8 @@ paths:
post:
responses:
'200':
- description: OK
+ description: >-
+ The job that was created to run the evaluation.
content:
application/json:
schema:
@@ -2209,10 +2255,12 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
- description: ''
+ description: Run an evaluation on a benchmark.
parameters:
- name: benchmark_id
in: path
+ description: >-
+ The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
@@ -2280,7 +2328,8 @@ paths:
post:
responses:
'200':
- description: OK
+ description: >-
+ ScoreResponse object containing rows and aggregated results
content:
application/json:
schema:
@@ -2297,7 +2346,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Scoring
- description: ''
+ description: Score a list of rows.
parameters: []
requestBody:
content:
@@ -3567,6 +3616,7 @@ components:
properties:
agent_config:
$ref: '#/components/schemas/AgentConfig'
+ description: The configuration for the agent.
additionalProperties: false
required:
- agent_config
@@ -3585,6 +3635,7 @@ components:
properties:
session_name:
type: string
+ description: The name of the session to create.
additionalProperties: false
required:
- session_name
@@ -3607,8 +3658,12 @@ components:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
+ description: List of messages to start the turn with.
stream:
type: boolean
+ description: >-
+ (Optional) If True, generate an SSE event stream of the response. Defaults
+ to False.
documents:
type: array
items:
@@ -3622,19 +3677,30 @@ components:
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
+ description: The content of the document.
mime_type:
type: string
+ description: The MIME type of the document.
additionalProperties: false
required:
- content
- mime_type
title: Document
+ description: A document to be used by an agent.
+ description: >-
+ (Optional) List of documents to create the turn with.
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
+ description: >-
+ (Optional) List of toolgroups to create the turn with, will be used in
+ addition to the agent's config toolgroups for the request.
tool_config:
$ref: '#/components/schemas/ToolConfig'
+ description: >-
+ (Optional) The tool configuration to create the turn with, will be used
+ to override the agent's tool_config.
additionalProperties: false
required:
- messages
@@ -3644,20 +3710,25 @@ components:
properties:
turn_id:
type: string
+ description: The ID of the turn.
step_id:
type: string
+ description: The ID of the step.
started_at:
type: string
format: date-time
+ description: The time the step started.
completed_at:
type: string
format: date-time
+ description: The time the step completed.
step_type:
type: string
const: inference
default: inference
model_response:
$ref: '#/components/schemas/CompletionMessage'
+ description: The response from the LLM.
additionalProperties: false
required:
- turn_id
@@ -3665,27 +3736,36 @@ components:
- step_type
- model_response
title: InferenceStep
+ description: An inference step in an agent turn.
MemoryRetrievalStep:
type: object
properties:
turn_id:
type: string
+ description: The ID of the turn.
step_id:
type: string
+ description: The ID of the step.
started_at:
type: string
format: date-time
+ description: The time the step started.
completed_at:
type: string
format: date-time
+ description: The time the step completed.
step_type:
type: string
const: memory_retrieval
default: memory_retrieval
vector_db_ids:
type: string
+ description: >-
+ The IDs of the vector databases to retrieve context from.
inserted_context:
$ref: '#/components/schemas/InterleavedContent'
+ description: >-
+ The context retrieved from the vector databases.
additionalProperties: false
required:
- turn_id
@@ -3694,6 +3774,8 @@ components:
- vector_db_ids
- inserted_context
title: MemoryRetrievalStep
+ description: >-
+ A memory retrieval step in an agent turn.
SafetyViolation:
type: object
properties:
@@ -3721,39 +3803,49 @@ components:
properties:
turn_id:
type: string
+ description: The ID of the turn.
step_id:
type: string
+ description: The ID of the step.
started_at:
type: string
format: date-time
+ description: The time the step started.
completed_at:
type: string
format: date-time
+ description: The time the step completed.
step_type:
type: string
const: shield_call
default: shield_call
violation:
$ref: '#/components/schemas/SafetyViolation'
+ description: The violation from the shield call.
additionalProperties: false
required:
- turn_id
- step_id
- step_type
title: ShieldCallStep
+ description: A shield call step in an agent turn.
ToolExecutionStep:
type: object
properties:
turn_id:
type: string
+ description: The ID of the turn.
step_id:
type: string
+ description: The ID of the step.
started_at:
type: string
format: date-time
+ description: The time the step started.
completed_at:
type: string
format: date-time
+ description: The time the step completed.
step_type:
type: string
const: tool_execution
@@ -3762,10 +3854,12 @@ components:
type: array
items:
$ref: '#/components/schemas/ToolCall'
+ description: The tool calls to execute.
tool_responses:
type: array
items:
$ref: '#/components/schemas/ToolResponse'
+ description: The tool responses from the tool calls.
additionalProperties: false
required:
- turn_id
@@ -3774,6 +3868,7 @@ components:
- tool_calls
- tool_responses
title: ToolExecutionStep
+ description: A tool execution step in an agent turn.
ToolResponse:
type: object
properties:
@@ -3850,13 +3945,16 @@ components:
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
+ description: The content of the attachment.
mime_type:
type: string
+ description: The MIME type of the attachment.
additionalProperties: false
required:
- content
- mime_type
title: Attachment
+ description: An attachment to an agent turn.
started_at:
type: string
format: date-time
@@ -3922,6 +4020,7 @@ components:
- shield_call
- memory_retrieval
title: StepType
+ description: Type of the step in an agent turn.
step_id:
type: string
step_details:
@@ -3959,6 +4058,7 @@ components:
- shield_call
- memory_retrieval
title: StepType
+ description: Type of the step in an agent turn.
step_id:
type: string
delta:
@@ -3985,6 +4085,7 @@ components:
- shield_call
- memory_retrieval
title: StepType
+ description: Type of the step in an agent turn.
step_id:
type: string
metadata:
@@ -4212,11 +4313,14 @@ components:
default: agent
config:
$ref: '#/components/schemas/AgentConfig'
+ description: >-
+ The configuration for the agent candidate.
additionalProperties: false
required:
- type
- config
title: AgentCandidate
+ description: An agent candidate for evaluation.
AggregationFunctionType:
type: string
enum:
@@ -4245,17 +4349,26 @@ components:
properties:
eval_candidate:
$ref: '#/components/schemas/EvalCandidate'
+ description: The candidate to evaluate.
scoring_params:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringFnParams'
+ description: >-
+ Map between scoring function id and parameters for each scoring function
+ you want to run
num_examples:
type: integer
+ description: >-
+ (Optional) The number of examples to evaluate. If not provided, all examples
+ in the dataset will be evaluated
additionalProperties: false
required:
- eval_candidate
- scoring_params
title: BenchmarkConfig
+ description: >-
+ A benchmark configuration for evaluation.
EvalCandidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
@@ -4298,16 +4411,22 @@ components:
default: model
model:
type: string
+ description: The model ID to evaluate.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
+ description: The sampling parameters for the model.
system_message:
$ref: '#/components/schemas/SystemMessage'
+ description: >-
+ (Optional) The system message providing instructions or context to the
+ model.
additionalProperties: false
required:
- type
- model
- sampling_params
title: ModelCandidate
+ description: A model candidate for evaluation.
RegexParserScoringFnParams:
type: object
properties:
@@ -4353,12 +4472,16 @@ components:
- type: string
- type: array
- type: object
+ description: The rows to evaluate.
scoring_functions:
type: array
items:
type: string
+ description: >-
+ The scoring functions to use for the evaluation.
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
+ description: The configuration for the benchmark.
additionalProperties: false
required:
- input_rows
@@ -4380,15 +4503,18 @@ components:
- type: string
- type: array
- type: object
+ description: The generations from the evaluation.
scores:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
+ description: The scores from the evaluation.
additionalProperties: false
required:
- generations
- scores
title: EvaluateResponse
+ description: The response from an evaluation.
ScoringResult:
type: object
properties:
@@ -4404,6 +4530,8 @@ components:
- type: string
- type: array
- type: object
+ description: >-
+ The scoring result for each row. Each row is a map of column name to value.
aggregated_results:
type: object
additionalProperties:
@@ -4414,11 +4542,13 @@ components:
- type: string
- type: array
- type: object
+ description: Map of metric name to aggregated value
additionalProperties: false
required:
- score_rows
- aggregated_results
title: ScoringResult
+ description: A scoring result for a single row.
Session:
type: object
properties:
@@ -4731,15 +4861,19 @@ components:
- type: string
- type: array
- type: object
+ description: The rows in the current page.
total_count:
type: integer
+ description: The total number of rows in the dataset.
next_page_token:
type: string
+ description: The token to get the next page of rows.
additionalProperties: false
required:
- rows
- total_count
title: PaginatedRowsResult
+ description: A paginated list of rows from a dataset.
ScoringFn:
type: object
properties:
@@ -6170,6 +6304,7 @@ components:
properties:
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
+ description: The configuration for the benchmark.
additionalProperties: false
required:
- benchmark_config
@@ -6251,12 +6386,15 @@ components:
- type: string
- type: array
- type: object
+ description: The rows to score.
scoring_functions:
type: object
additionalProperties:
oneOf:
- $ref: '#/components/schemas/ScoringFnParams'
- type: 'null'
+ description: >-
+ The scoring functions to use for the scoring.
additionalProperties: false
required:
- input_rows
@@ -6269,10 +6407,13 @@ components:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
+ description: >-
+ A map of scoring function name to ScoringResult.
additionalProperties: false
required:
- results
title: ScoreResponse
+ description: The response from scoring.
ScoreBatchRequest:
type: object
properties:
@@ -6543,6 +6684,8 @@ tags:
- name: DatasetIO
- name: Datasets
- name: Eval
+ x-displayName: >-
+ Llama Stack Evaluation API for running evaluations on model and agent candidates.
- name: Files (Coming Soon)
- name: Inference
description: >-
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index eb3399788..def61b617 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -41,16 +41,36 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
class Attachment(BaseModel):
+ """An attachment to an agent turn.
+
+ :param content: The content of the attachment.
+ :param mime_type: The MIME type of the attachment.
+ """
+
content: InterleavedContent | URL
mime_type: str
class Document(BaseModel):
+ """A document to be used by an agent.
+
+ :param content: The content of the document.
+ :param mime_type: The MIME type of the document.
+ """
+
content: InterleavedContent | URL
mime_type: str
class StepCommon(BaseModel):
+ """A common step in an agent turn.
+
+ :param turn_id: The ID of the turn.
+ :param step_id: The ID of the step.
+ :param started_at: The time the step started.
+ :param completed_at: The time the step completed.
+ """
+
turn_id: str
step_id: str
started_at: Optional[datetime] = None
@@ -58,6 +78,14 @@ class StepCommon(BaseModel):
class StepType(Enum):
+ """Type of the step in an agent turn.
+
+ :cvar inference: The step is an inference step that calls an LLM.
+ :cvar tool_execution: The step is a tool execution step that executes a tool call.
+ :cvar shield_call: The step is a shield call step that checks for safety violations.
+ :cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
+ """
+
inference = "inference"
tool_execution = "tool_execution"
shield_call = "shield_call"
@@ -66,6 +94,11 @@ class StepType(Enum):
@json_schema_type
class InferenceStep(StepCommon):
+ """An inference step in an agent turn.
+
+ :param model_response: The response from the LLM.
+ """
+
model_config = ConfigDict(protected_namespaces=())
step_type: Literal[StepType.inference.value] = StepType.inference.value
@@ -74,6 +107,12 @@ class InferenceStep(StepCommon):
@json_schema_type
class ToolExecutionStep(StepCommon):
+ """A tool execution step in an agent turn.
+
+ :param tool_calls: The tool calls to execute.
+ :param tool_responses: The tool responses from the tool calls.
+ """
+
step_type: Literal[StepType.tool_execution.value] = StepType.tool_execution.value
tool_calls: List[ToolCall]
tool_responses: List[ToolResponse]
@@ -81,13 +120,25 @@ class ToolExecutionStep(StepCommon):
@json_schema_type
class ShieldCallStep(StepCommon):
+ """A shield call step in an agent turn.
+
+ :param violation: The violation from the shield call.
+ """
+
step_type: Literal[StepType.shield_call.value] = StepType.shield_call.value
violation: Optional[SafetyViolation]
@json_schema_type
class MemoryRetrievalStep(StepCommon):
+ """A memory retrieval step in an agent turn.
+
+ :param vector_db_ids: The IDs of the vector databases to retrieve context from.
+ :param inserted_context: The context retrieved from the vector databases.
+ """
+
step_type: Literal[StepType.memory_retrieval.value] = StepType.memory_retrieval.value
+ # TODO: should this be List[str]?
vector_db_ids: str
inserted_context: InterleavedContent
@@ -335,7 +386,13 @@ class Agents(Protocol):
async def create_agent(
self,
agent_config: AgentConfig,
- ) -> AgentCreateResponse: ...
+ ) -> AgentCreateResponse:
+ """Create an agent with the given configuration.
+
+ :param agent_config: The configuration for the agent.
+ :returns: An AgentCreateResponse with the agent ID.
+ """
+ ...
@webmethod(route="/agents/{agent_id}/session/{session_id}/turn", method="POST")
async def create_agent_turn(
@@ -352,7 +409,19 @@ class Agents(Protocol):
documents: Optional[List[Document]] = None,
toolgroups: Optional[List[AgentToolGroup]] = None,
tool_config: Optional[ToolConfig] = None,
- ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
+ ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
+ """Create a new turn for an agent.
+
+ :param agent_id: The ID of the agent to create the turn for.
+ :param session_id: The ID of the session to create the turn for.
+ :param messages: List of messages to start the turn with.
+ :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
+ :param documents: (Optional) List of documents to create the turn with.
+ :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
+ :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
+ :returns: If stream=False, returns a Turn object.
+ If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk
+ """
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
@@ -388,7 +457,15 @@ class Agents(Protocol):
agent_id: str,
session_id: str,
turn_id: str,
- ) -> Turn: ...
+ ) -> Turn:
+ """Retrieve an agent turn by its ID.
+
+ :param agent_id: The ID of the agent to get the turn for.
+ :param session_id: The ID of the session to get the turn for.
+ :param turn_id: The ID of the turn to get.
+ :returns: A Turn.
+ """
+ ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
@@ -400,14 +477,30 @@ class Agents(Protocol):
session_id: str,
turn_id: str,
step_id: str,
- ) -> AgentStepResponse: ...
+ ) -> AgentStepResponse:
+ """Retrieve an agent step by its ID.
+
+ :param agent_id: The ID of the agent to get the step for.
+ :param session_id: The ID of the session to get the step for.
+ :param turn_id: The ID of the turn to get the step for.
+ :param step_id: The ID of the step to get.
+ :returns: An AgentStepResponse.
+ """
+ ...
@webmethod(route="/agents/{agent_id}/session", method="POST")
async def create_agent_session(
self,
agent_id: str,
session_name: str,
- ) -> AgentSessionCreateResponse: ...
+ ) -> AgentSessionCreateResponse:
+ """Create a new session for an agent.
+
+ :param agent_id: The ID of the agent to create the session for.
+ :param session_name: The name of the session to create.
+ :returns: An AgentSessionCreateResponse.
+ """
+ ...
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
async def get_agents_session(
@@ -415,17 +508,35 @@ class Agents(Protocol):
session_id: str,
agent_id: str,
turn_ids: Optional[List[str]] = None,
- ) -> Session: ...
+ ) -> Session:
+ """Retrieve an agent session by its ID.
+
+ :param session_id: The ID of the session to get.
+ :param agent_id: The ID of the agent to get the session for.
+ :param turn_ids: (Optional) List of turn IDs to filter the session by.
+ """
+ ...
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
async def delete_agents_session(
self,
session_id: str,
agent_id: str,
- ) -> None: ...
+ ) -> None:
+ """Delete an agent session by its ID.
+
+ :param session_id: The ID of the session to delete.
+ :param agent_id: The ID of the agent to delete the session for.
+ """
+ ...
@webmethod(route="/agents/{agent_id}", method="DELETE")
async def delete_agent(
self,
agent_id: str,
- ) -> None: ...
+ ) -> None:
+ """Delete an agent by its ID.
+
+ :param agent_id: The ID of the agent to delete.
+ """
+ ...
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index d85d22876..6a04a6329 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -14,6 +14,14 @@ from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
class PaginatedRowsResult(BaseModel):
+ """
+ A paginated list of rows from a dataset.
+
+ :param rows: The rows in the current page.
+ :param total_count: The total number of rows in the dataset.
+ :param next_page_token: The token to get the next page of rows.
+ """
+
# the rows obey the DatasetSchema for the given dataset
rows: List[Dict[str, Any]]
total_count: int
@@ -36,7 +44,15 @@ class DatasetIO(Protocol):
rows_in_page: int,
page_token: Optional[str] = None,
filter_condition: Optional[str] = None,
- ) -> PaginatedRowsResult: ...
+ ) -> PaginatedRowsResult:
+ """Get a paginated list of rows from a dataset.
+
+ :param dataset_id: The ID of the dataset to get the rows from.
+ :param rows_in_page: The number of rows to get per page.
+ :param page_token: The token to get the next page of rows.
+ :param filter_condition: (Optional) A condition to filter the rows by.
+ """
+ ...
@webmethod(route="/datasetio/rows", method="POST")
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 40a3b750a..dec018d83 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -19,6 +19,13 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
@json_schema_type
class ModelCandidate(BaseModel):
+ """A model candidate for evaluation.
+
+ :param model: The model ID to evaluate.
+ :param sampling_params: The sampling parameters for the model.
+ :param system_message: (Optional) The system message providing instructions or context to the model.
+ """
+
type: Literal["model"] = "model"
model: str
sampling_params: SamplingParams
@@ -27,6 +34,11 @@ class ModelCandidate(BaseModel):
@json_schema_type
class AgentCandidate(BaseModel):
+ """An agent candidate for evaluation.
+
+ :param config: The configuration for the agent candidate.
+ """
+
type: Literal["agent"] = "agent"
config: AgentConfig
@@ -39,6 +51,13 @@ EvalCandidate = register_schema(
@json_schema_type
class BenchmarkConfig(BaseModel):
+ """A benchmark configuration for evaluation.
+
+ :param eval_candidate: The candidate to evaluate.
+ :param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
+ :param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
+ """
+
eval_candidate: EvalCandidate
scoring_params: Dict[str, ScoringFnParams] = Field(
description="Map between scoring function id and parameters for each scoring function you want to run",
@@ -53,18 +72,32 @@ class BenchmarkConfig(BaseModel):
@json_schema_type
class EvaluateResponse(BaseModel):
+ """The response from an evaluation.
+
+ :param generations: The generations from the evaluation.
+ :param scores: The scores from the evaluation.
+ """
+
generations: List[Dict[str, Any]]
# each key in the dict is a scoring function name
scores: Dict[str, ScoringResult]
class Eval(Protocol):
+ """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
async def run_eval(
self,
benchmark_id: str,
benchmark_config: BenchmarkConfig,
- ) -> Job: ...
+ ) -> Job:
+ """Run an evaluation on a benchmark.
+
+ :param benchmark_id: The ID of the benchmark to run the evaluation on.
+ :param benchmark_config: The configuration for the benchmark.
+ :return: The job that was created to run the evaluation.
+ """
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
async def evaluate_rows(
@@ -73,13 +106,40 @@ class Eval(Protocol):
input_rows: List[Dict[str, Any]],
scoring_functions: List[str],
benchmark_config: BenchmarkConfig,
- ) -> EvaluateResponse: ...
+ ) -> EvaluateResponse:
+ """Evaluate a list of rows on a benchmark.
+
+ :param benchmark_id: The ID of the benchmark to run the evaluation on.
+ :param input_rows: The rows to evaluate.
+ :param scoring_functions: The scoring functions to use for the evaluation.
+ :param benchmark_config: The configuration for the benchmark.
+ :return: EvaluateResponse object containing generations and scores
+ """
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
- async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: ...
+ async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
+ """Get the status of a job.
+
+ :param benchmark_id: The ID of the benchmark to run the evaluation on.
+ :param job_id: The ID of the job to get the status of.
+ :return: The status of the evaluationjob.
+ """
+ ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
- async def job_cancel(self, benchmark_id: str, job_id: str) -> None: ...
+ async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+ """Cancel a job.
+
+ :param benchmark_id: The ID of the benchmark to run the evaluation on.
+ :param job_id: The ID of the job to cancel.
+ """
+ ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
- async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
+ async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+ """Get the result of a job.
+
+ :param benchmark_id: The ID of the benchmark to run the evaluation on.
+ :param job_id: The ID of the job to get the result of.
+ :return: The result of the job.
+ """
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 960149476..54a9ac2aa 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -17,6 +17,13 @@ ScoringResultRow = Dict[str, Any]
@json_schema_type
class ScoringResult(BaseModel):
+ """
+ A scoring result for a single row.
+
+ :param score_rows: The scoring result for each row. Each row is a map of column name to value.
+ :param aggregated_results: Map of metric name to aggregated value
+ """
+
score_rows: List[ScoringResultRow]
# aggregated metrics to value
aggregated_results: Dict[str, Any]
@@ -30,6 +37,12 @@ class ScoreBatchResponse(BaseModel):
@json_schema_type
class ScoreResponse(BaseModel):
+ """
+ The response from scoring.
+
+ :param results: A map of scoring function name to ScoringResult.
+ """
+
# each key in the dict is a scoring function name
results: Dict[str, ScoringResult]
@@ -55,4 +68,11 @@ class Scoring(Protocol):
self,
input_rows: List[Dict[str, Any]],
scoring_functions: Dict[str, Optional[ScoringFnParams]],
- ) -> ScoreResponse: ...
+ ) -> ScoreResponse:
+ """Score a list of rows.
+
+ :param input_rows: The rows to score.
+ :param scoring_functions: The scoring functions to use for the scoring.
+ :return: ScoreResponse object containing rows and aggregated results
+ """
+ ...