diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 02d05776d..6d199e29d 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -40,286 +40,6 @@
}
],
"paths": {
- "/v1/eval/tasks/{task_id}/evaluations": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "",
- "parameters": [
- {
- "name": "task_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DeprecatedEvaluateRowsRequest"
- }
- }
- },
- "required": true
- },
- "deprecated": true
- }
- },
- "/v1/eval-tasks/{eval_task_id}": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/Benchmark"
- },
- {
- "type": "null"
- }
- ]
- }
- }
- }
- }
- },
- "tags": [
- "Benchmarks"
- ],
- "description": "",
- "parameters": [
- {
- "name": "eval_task_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "deprecated": true
- }
- },
- "/v1/eval/tasks/{task_id}/jobs/{job_id}": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/JobStatus"
- },
- {
- "type": "null"
- }
- ]
- }
- }
- }
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "",
- "parameters": [
- {
- "name": "task_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "job_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "deprecated": true
- },
- "delete": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "",
- "parameters": [
- {
- "name": "task_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "job_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "deprecated": true
- }
- },
- "/v1/eval/tasks/{task_id}/jobs/{job_id}/result": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "",
- "parameters": [
- {
- "name": "task_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "job_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "deprecated": true
- }
- },
- "/v1/eval-tasks": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/ListBenchmarksResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Benchmarks"
- ],
- "description": "",
- "parameters": [],
- "deprecated": true
- },
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "Benchmarks"
- ],
- "description": "",
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DeprecatedRegisterEvalTaskRequest"
- }
- }
- },
- "required": true
- },
- "deprecated": true
- }
- },
- "/v1/eval/tasks/{task_id}/jobs": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/Job"
- }
- }
- }
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "",
- "parameters": [
- {
- "name": "task_id",
- "in": "path",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DeprecatedRunEvalRequest"
- }
- }
- },
- "required": true
- },
- "deprecated": true
- }
- },
"/v1/datasetio/rows": {
"get": {
"responses": {
@@ -2898,227 +2618,86 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
- "AgentCandidate": {
+ "AppendRowsRequest": {
"type": "object",
"properties": {
- "type": {
- "type": "string",
- "const": "agent",
- "default": "agent"
- },
- "config": {
- "$ref": "#/components/schemas/AgentConfig"
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "config"
- ],
- "title": "AgentCandidate"
- },
- "AgentConfig": {
- "type": "object",
- "properties": {
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "input_shields": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "output_shields": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "toolgroups": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AgentTool"
- }
- },
- "client_tools": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ToolDef"
- }
- },
- "tool_choice": {
- "type": "string",
- "enum": [
- "auto",
- "required",
- "none"
- ],
- "title": "ToolChoice",
- "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
- "deprecated": true
- },
- "tool_prompt_format": {
- "type": "string",
- "enum": [
- "json",
- "function_tag",
- "python_list"
- ],
- "title": "ToolPromptFormat",
- "description": "Prompt format for calling custom / zero shot tools.",
- "deprecated": true
- },
- "tool_config": {
- "$ref": "#/components/schemas/ToolConfig"
- },
- "max_infer_iters": {
- "type": "integer",
- "default": 10
- },
- "model": {
+ "dataset_id": {
"type": "string"
},
- "instructions": {
- "type": "string"
- },
- "enable_session_persistence": {
- "type": "boolean",
- "default": false
- },
- "response_format": {
- "$ref": "#/components/schemas/ResponseFormat"
- }
- },
- "additionalProperties": false,
- "required": [
- "model",
- "instructions"
- ],
- "title": "AgentConfig"
- },
- "AgentTool": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "args": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
+ "rows": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
}
- },
- "additionalProperties": false,
- "required": [
- "name",
- "args"
- ],
- "title": "AgentToolGroupWithArgs"
+ }
}
- ]
- },
- "AggregationFunctionType": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_id",
+ "rows"
],
- "title": "AggregationFunctionType"
+ "title": "AppendRowsRequest"
},
- "BasicScoringFnParams": {
+ "CompletionMessage": {
"type": "object",
"properties": {
- "type": {
+ "role": {
"type": "string",
- "const": "basic",
- "default": "basic"
+ "const": "assistant",
+ "default": "assistant",
+ "description": "Must be \"assistant\" to identify this as the model's response"
},
- "aggregation_functions": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent",
+ "description": "The content of the model's response"
+ },
+ "stop_reason": {
+ "type": "string",
+ "enum": [
+ "end_of_turn",
+ "end_of_message",
+ "out_of_tokens"
+ ],
+ "description": "Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: The model finished generating the entire response. - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response. - `StopReason.out_of_tokens`: The model ran out of token budget."
+ },
+ "tool_calls": {
"type": "array",
"items": {
- "$ref": "#/components/schemas/AggregationFunctionType"
- }
+ "$ref": "#/components/schemas/ToolCall"
+ },
+ "description": "List of tool calls. Each tool call is a ToolCall object."
}
},
"additionalProperties": false,
"required": [
- "type"
+ "role",
+ "content",
+ "stop_reason"
],
- "title": "BasicScoringFnParams"
- },
- "BenchmarkConfig": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "benchmark",
- "default": "benchmark"
- },
- "eval_candidate": {
- "$ref": "#/components/schemas/EvalCandidate"
- },
- "scoring_params": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/components/schemas/ScoringFnParams"
- }
- },
- "num_examples": {
- "type": "integer"
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "eval_candidate",
- "scoring_params"
- ],
- "title": "BenchmarkConfig"
- },
- "EvalCandidate": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/ModelCandidate"
- },
- {
- "$ref": "#/components/schemas/AgentCandidate"
- }
- ],
- "discriminator": {
- "propertyName": "type",
- "mapping": {
- "model": "#/components/schemas/ModelCandidate",
- "agent": "#/components/schemas/AgentCandidate"
- }
- }
+ "title": "CompletionMessage",
+ "description": "A message containing the model's (assistant) response in a chat conversation."
},
"GrammarResponseFormat": {
"type": "object",
@@ -3290,92 +2869,30 @@
"title": "JsonSchemaResponseFormat",
"description": "Configuration for JSON schema-guided response generation."
},
- "LLMAsJudgeScoringFnParams": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "llm_as_judge",
- "default": "llm_as_judge"
+ "Message": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/UserMessage"
},
- "judge_model": {
- "type": "string"
- },
- "prompt_template": {
- "type": "string"
- },
- "judge_score_regexes": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "aggregation_functions": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AggregationFunctionType"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "judge_model"
- ],
- "title": "LLMAsJudgeScoringFnParams"
- },
- "ModelCandidate": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "model",
- "default": "model"
- },
- "model": {
- "type": "string"
- },
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "system_message": {
+ {
"$ref": "#/components/schemas/SystemMessage"
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "model",
- "sampling_params"
- ],
- "title": "ModelCandidate"
- },
- "RegexParserScoringFnParams": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "regex_parser",
- "default": "regex_parser"
},
- "parsing_regexes": {
- "type": "array",
- "items": {
- "type": "string"
- }
+ {
+ "$ref": "#/components/schemas/ToolResponseMessage"
},
- "aggregation_functions": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AggregationFunctionType"
- }
+ {
+ "$ref": "#/components/schemas/CompletionMessage"
}
- },
- "additionalProperties": false,
- "required": [
- "type"
],
- "title": "RegexParserScoringFnParams"
+ "discriminator": {
+ "propertyName": "role",
+ "mapping": {
+ "user": "#/components/schemas/UserMessage",
+ "system": "#/components/schemas/SystemMessage",
+ "tool": "#/components/schemas/ToolResponseMessage",
+ "assistant": "#/components/schemas/CompletionMessage"
+ }
+ }
},
"ResponseFormat": {
"oneOf": [
@@ -3436,27 +2953,6 @@
}
}
},
- "ScoringFnParams": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
- },
- {
- "$ref": "#/components/schemas/RegexParserScoringFnParams"
- },
- {
- "$ref": "#/components/schemas/BasicScoringFnParams"
- }
- ],
- "discriminator": {
- "propertyName": "type",
- "mapping": {
- "llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
- "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
- "basic": "#/components/schemas/BasicScoringFnParams"
- }
- }
- },
"SystemMessage": {
"type": "object",
"properties": {
@@ -3501,635 +2997,6 @@
"title": "TextContentItem",
"description": "A text content item"
},
- "ToolConfig": {
- "type": "object",
- "properties": {
- "tool_choice": {
- "oneOf": [
- {
- "type": "string",
- "enum": [
- "auto",
- "required",
- "none"
- ],
- "title": "ToolChoice",
- "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
- },
- {
- "type": "string"
- }
- ],
- "default": "auto",
- "description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto."
- },
- "tool_prompt_format": {
- "type": "string",
- "enum": [
- "json",
- "function_tag",
- "python_list"
- ],
- "description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls."
- },
- "system_message_behavior": {
- "type": "string",
- "enum": [
- "append",
- "replace"
- ],
- "description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.",
- "default": "append"
- }
- },
- "additionalProperties": false,
- "title": "ToolConfig",
- "description": "Configuration for tool use."
- },
- "ToolDef": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "description": {
- "type": "string"
- },
- "parameters": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ToolParameter"
- }
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "name"
- ],
- "title": "ToolDef"
- },
- "ToolParameter": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "parameter_type": {
- "type": "string"
- },
- "description": {
- "type": "string"
- },
- "required": {
- "type": "boolean",
- "default": true
- },
- "default": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- },
- "additionalProperties": false,
- "required": [
- "name",
- "parameter_type",
- "description",
- "required"
- ],
- "title": "ToolParameter"
- },
- "TopKSamplingStrategy": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "top_k",
- "default": "top_k"
- },
- "top_k": {
- "type": "integer"
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "top_k"
- ],
- "title": "TopKSamplingStrategy"
- },
- "TopPSamplingStrategy": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "top_p",
- "default": "top_p"
- },
- "temperature": {
- "type": "number"
- },
- "top_p": {
- "type": "number",
- "default": 0.95
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "TopPSamplingStrategy"
- },
- "URL": {
- "type": "object",
- "properties": {
- "uri": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "uri"
- ],
- "title": "URL"
- },
- "DeprecatedEvaluateRowsRequest": {
- "type": "object",
- "properties": {
- "input_rows": {
- "type": "array",
- "items": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "task_config": {
- "$ref": "#/components/schemas/BenchmarkConfig"
- }
- },
- "additionalProperties": false,
- "required": [
- "input_rows",
- "scoring_functions",
- "task_config"
- ],
- "title": "DeprecatedEvaluateRowsRequest"
- },
- "EvaluateResponse": {
- "type": "object",
- "properties": {
- "generations": {
- "type": "array",
- "items": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "scores": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/components/schemas/ScoringResult"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "generations",
- "scores"
- ],
- "title": "EvaluateResponse"
- },
- "ScoringResult": {
- "type": "object",
- "properties": {
- "score_rows": {
- "type": "array",
- "items": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "aggregated_results": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "score_rows",
- "aggregated_results"
- ],
- "title": "ScoringResult"
- },
- "Benchmark": {
- "type": "object",
- "properties": {
- "identifier": {
- "type": "string"
- },
- "provider_resource_id": {
- "type": "string"
- },
- "provider_id": {
- "type": "string"
- },
- "type": {
- "type": "string",
- "const": "benchmark",
- "default": "benchmark"
- },
- "dataset_id": {
- "type": "string"
- },
- "scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "identifier",
- "provider_resource_id",
- "provider_id",
- "type",
- "dataset_id",
- "scoring_functions",
- "metadata"
- ],
- "title": "Benchmark"
- },
- "JobStatus": {
- "type": "string",
- "enum": [
- "completed",
- "in_progress",
- "failed",
- "scheduled"
- ],
- "title": "JobStatus"
- },
- "ListBenchmarksResponse": {
- "type": "object",
- "properties": {
- "data": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/Benchmark"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "data"
- ],
- "title": "ListBenchmarksResponse"
- },
- "DeprecatedRegisterEvalTaskRequest": {
- "type": "object",
- "properties": {
- "eval_task_id": {
- "type": "string"
- },
- "dataset_id": {
- "type": "string"
- },
- "scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "provider_benchmark_id": {
- "type": "string"
- },
- "provider_id": {
- "type": "string"
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "eval_task_id",
- "dataset_id",
- "scoring_functions"
- ],
- "title": "DeprecatedRegisterEvalTaskRequest"
- },
- "DeprecatedRunEvalRequest": {
- "type": "object",
- "properties": {
- "task_config": {
- "$ref": "#/components/schemas/BenchmarkConfig"
- }
- },
- "additionalProperties": false,
- "required": [
- "task_config"
- ],
- "title": "DeprecatedRunEvalRequest"
- },
- "Job": {
- "type": "object",
- "properties": {
- "job_id": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_id"
- ],
- "title": "Job"
- },
- "AppendRowsRequest": {
- "type": "object",
- "properties": {
- "dataset_id": {
- "type": "string"
- },
- "rows": {
- "type": "array",
- "items": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "dataset_id",
- "rows"
- ],
- "title": "AppendRowsRequest"
- },
- "CompletionMessage": {
- "type": "object",
- "properties": {
- "role": {
- "type": "string",
- "const": "assistant",
- "default": "assistant",
- "description": "Must be \"assistant\" to identify this as the model's response"
- },
- "content": {
- "$ref": "#/components/schemas/InterleavedContent",
- "description": "The content of the model's response"
- },
- "stop_reason": {
- "type": "string",
- "enum": [
- "end_of_turn",
- "end_of_message",
- "out_of_tokens"
- ],
- "description": "Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: The model finished generating the entire response. - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response. - `StopReason.out_of_tokens`: The model ran out of token budget."
- },
- "tool_calls": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ToolCall"
- },
- "description": "List of tool calls. Each tool call is a ToolCall object."
- }
- },
- "additionalProperties": false,
- "required": [
- "role",
- "content",
- "stop_reason"
- ],
- "title": "CompletionMessage",
- "description": "A message containing the model's (assistant) response in a chat conversation."
- },
- "Message": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ],
- "discriminator": {
- "propertyName": "role",
- "mapping": {
- "user": "#/components/schemas/UserMessage",
- "system": "#/components/schemas/SystemMessage",
- "tool": "#/components/schemas/ToolResponseMessage",
- "assistant": "#/components/schemas/CompletionMessage"
- }
- }
- },
"ToolCall": {
"type": "object",
"properties": {
@@ -4352,6 +3219,60 @@
"title": "ToolResponseMessage",
"description": "A message representing the result of a tool invocation."
},
+ "TopKSamplingStrategy": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "top_k",
+ "default": "top_k"
+ },
+ "top_k": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "top_k"
+ ],
+ "title": "TopKSamplingStrategy"
+ },
+ "TopPSamplingStrategy": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "top_p",
+ "default": "top_p"
+ },
+ "temperature": {
+ "type": "number"
+ },
+ "top_p": {
+ "type": "number",
+ "default": 0.95
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "TopPSamplingStrategy"
+ },
+ "URL": {
+ "type": "object",
+ "properties": {
+ "uri": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "uri"
+ ],
+ "title": "URL"
+ },
"UserMessage": {
"type": "object",
"properties": {
@@ -4675,6 +3596,51 @@
],
"title": "CancelTrainingJobRequest"
},
+ "ToolConfig": {
+ "type": "object",
+ "properties": {
+ "tool_choice": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": [
+ "auto",
+ "required",
+ "none"
+ ],
+ "title": "ToolChoice",
+ "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
+ },
+ {
+ "type": "string"
+ }
+ ],
+ "default": "auto",
+ "description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto."
+ },
+ "tool_prompt_format": {
+ "type": "string",
+ "enum": [
+ "json",
+ "function_tag",
+ "python_list"
+ ],
+ "description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls."
+ },
+ "system_message_behavior": {
+ "type": "string",
+ "enum": [
+ "append",
+ "replace"
+ ],
+ "description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.",
+ "default": "append"
+ }
+ },
+ "additionalProperties": false,
+ "title": "ToolConfig",
+ "description": "Configuration for tool use."
+ },
"ChatCompletionRequest": {
"type": "object",
"properties": {
@@ -4983,6 +3949,227 @@
"title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response."
},
+ "AgentConfig": {
+ "type": "object",
+ "properties": {
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "toolgroups": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/AgentTool"
+ }
+ },
+ "client_tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ToolDef"
+ }
+ },
+ "tool_choice": {
+ "type": "string",
+ "enum": [
+ "auto",
+ "required",
+ "none"
+ ],
+ "title": "ToolChoice",
+ "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
+ "deprecated": true
+ },
+ "tool_prompt_format": {
+ "type": "string",
+ "enum": [
+ "json",
+ "function_tag",
+ "python_list"
+ ],
+ "title": "ToolPromptFormat",
+ "description": "Prompt format for calling custom / zero shot tools.",
+ "deprecated": true
+ },
+ "tool_config": {
+ "$ref": "#/components/schemas/ToolConfig"
+ },
+ "max_infer_iters": {
+ "type": "integer",
+ "default": 10
+ },
+ "model": {
+ "type": "string"
+ },
+ "instructions": {
+ "type": "string"
+ },
+ "enable_session_persistence": {
+ "type": "boolean",
+ "default": false
+ },
+ "response_format": {
+ "$ref": "#/components/schemas/ResponseFormat"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "model",
+ "instructions"
+ ],
+ "title": "AgentConfig"
+ },
+ "AgentTool": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "args": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name",
+ "args"
+ ],
+ "title": "AgentToolGroupWithArgs"
+ }
+ ]
+ },
+ "ToolDef": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "parameters": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ToolParameter"
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name"
+ ],
+ "title": "ToolDef"
+ },
+ "ToolParameter": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "parameter_type": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "required": {
+ "type": "boolean",
+ "default": true
+ },
+ "default": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name",
+ "parameter_type",
+ "description",
+ "required"
+ ],
+ "title": "ToolParameter"
+ },
"CreateAgentRequest": {
"type": "object",
"properties": {
@@ -5836,6 +5023,204 @@
"title": "EmbeddingsResponse",
"description": "Response containing generated embeddings."
},
+ "AgentCandidate": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "agent",
+ "default": "agent"
+ },
+ "config": {
+ "$ref": "#/components/schemas/AgentConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "config"
+ ],
+ "title": "AgentCandidate"
+ },
+ "AggregationFunctionType": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType"
+ },
+ "BasicScoringFnParams": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "basic",
+ "default": "basic"
+ },
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/AggregationFunctionType"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "BasicScoringFnParams"
+ },
+ "BenchmarkConfig": {
+ "type": "object",
+ "properties": {
+ "eval_candidate": {
+ "$ref": "#/components/schemas/EvalCandidate"
+ },
+ "scoring_params": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/ScoringFnParams"
+ }
+ },
+ "num_examples": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "eval_candidate",
+ "scoring_params"
+ ],
+ "title": "BenchmarkConfig"
+ },
+ "EvalCandidate": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ModelCandidate"
+ },
+ {
+ "$ref": "#/components/schemas/AgentCandidate"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "model": "#/components/schemas/ModelCandidate",
+ "agent": "#/components/schemas/AgentCandidate"
+ }
+ }
+ },
+ "LLMAsJudgeScoringFnParams": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "llm_as_judge",
+ "default": "llm_as_judge"
+ },
+ "judge_model": {
+ "type": "string"
+ },
+ "prompt_template": {
+ "type": "string"
+ },
+ "judge_score_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/AggregationFunctionType"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "judge_model"
+ ],
+ "title": "LLMAsJudgeScoringFnParams"
+ },
+ "ModelCandidate": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "model",
+ "default": "model"
+ },
+ "model": {
+ "type": "string"
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "system_message": {
+ "$ref": "#/components/schemas/SystemMessage"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "model",
+ "sampling_params"
+ ],
+ "title": "ModelCandidate"
+ },
+ "RegexParserScoringFnParams": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "regex_parser",
+ "default": "regex_parser"
+ },
+ "parsing_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/AggregationFunctionType"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ],
+ "title": "RegexParserScoringFnParams"
+ },
+ "ScoringFnParams": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+ },
+ {
+ "$ref": "#/components/schemas/RegexParserScoringFnParams"
+ },
+ {
+ "$ref": "#/components/schemas/BasicScoringFnParams"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
+ "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
+ "basic": "#/components/schemas/BasicScoringFnParams"
+ }
+ }
+ },
"EvaluateRowsRequest": {
"type": "object",
"properties": {
@@ -5885,6 +5270,115 @@
],
"title": "EvaluateRowsRequest"
},
+ "EvaluateResponse": {
+ "type": "object",
+ "properties": {
+ "generations": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "scores": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/ScoringResult"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "generations",
+ "scores"
+ ],
+ "title": "EvaluateResponse"
+ },
+ "ScoringResult": {
+ "type": "object",
+ "properties": {
+ "score_rows": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "aggregated_results": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "score_rows",
+ "aggregated_results"
+ ],
+ "title": "ScoringResult"
+ },
"Session": {
"type": "object",
"properties": {
@@ -5950,6 +5444,70 @@
],
"title": "AgentStepResponse"
},
+ "Benchmark": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "string"
+ },
+ "provider_resource_id": {
+ "type": "string"
+ },
+ "provider_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "benchmark",
+ "default": "benchmark"
+ },
+ "dataset_id": {
+ "type": "string"
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "identifier",
+ "provider_resource_id",
+ "provider_id",
+ "type",
+ "dataset_id",
+ "scoring_functions",
+ "metadata"
+ ],
+ "title": "Benchmark"
+ },
"AgentTurnInputType": {
"type": "object",
"properties": {
@@ -6769,6 +6327,16 @@
"title": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job."
},
+ "JobStatus": {
+ "type": "string",
+ "enum": [
+ "completed",
+ "in_progress",
+ "failed",
+ "scheduled"
+ ],
+ "title": "JobStatus"
+ },
"PostTrainingJobStatusResponse": {
"type": "object",
"properties": {
@@ -7139,6 +6707,22 @@
"title": "ListBucketResponse",
"description": "Response representing a list of file entries."
},
+ "ListBenchmarksResponse": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Benchmark"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "data"
+ ],
+ "title": "ListBenchmarksResponse"
+ },
"ListDatasetsResponse": {
"type": "object",
"properties": {
@@ -8436,6 +8020,19 @@
],
"title": "RunEvalRequest"
},
+ "Job": {
+ "type": "object",
+ "properties": {
+ "job_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_id"
+ ],
+ "title": "Job"
+ },
"RunShieldRequest": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index f79120f1d..f8d8ec5fe 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -10,175 +10,6 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
- /v1/eval/tasks/{task_id}/evaluations:
- post:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluateResponse'
- tags:
- - Eval
- description: ''
- parameters:
- - name: task_id
- in: path
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/DeprecatedEvaluateRowsRequest'
- required: true
- deprecated: true
- /v1/eval-tasks/{eval_task_id}:
- get:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/Benchmark'
- - type: 'null'
- tags:
- - Benchmarks
- description: ''
- parameters:
- - name: eval_task_id
- in: path
- required: true
- schema:
- type: string
- deprecated: true
- /v1/eval/tasks/{task_id}/jobs/{job_id}:
- get:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/JobStatus'
- - type: 'null'
- tags:
- - Eval
- description: ''
- parameters:
- - name: task_id
- in: path
- required: true
- schema:
- type: string
- - name: job_id
- in: path
- required: true
- schema:
- type: string
- deprecated: true
- delete:
- responses:
- '200':
- description: OK
- tags:
- - Eval
- description: ''
- parameters:
- - name: task_id
- in: path
- required: true
- schema:
- type: string
- - name: job_id
- in: path
- required: true
- schema:
- type: string
- deprecated: true
- /v1/eval/tasks/{task_id}/jobs/{job_id}/result:
- get:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluateResponse'
- tags:
- - Eval
- description: ''
- parameters:
- - name: task_id
- in: path
- required: true
- schema:
- type: string
- - name: job_id
- in: path
- required: true
- schema:
- type: string
- deprecated: true
- /v1/eval-tasks:
- get:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ListBenchmarksResponse'
- tags:
- - Benchmarks
- description: ''
- parameters: []
- deprecated: true
- post:
- responses:
- '200':
- description: OK
- tags:
- - Benchmarks
- description: ''
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/DeprecatedRegisterEvalTaskRequest'
- required: true
- deprecated: true
- /v1/eval/tasks/{task_id}/jobs:
- post:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/Job'
- tags:
- - Eval
- description: ''
- parameters:
- - name: task_id
- in: path
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/DeprecatedRunEvalRequest'
- required: true
- deprecated: true
/v1/datasetio/rows:
get:
responses:
@@ -1758,157 +1589,67 @@ jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
schemas:
- AgentCandidate:
+ AppendRowsRequest:
type: object
properties:
- type:
+ dataset_id:
type: string
- const: agent
- default: agent
- config:
- $ref: '#/components/schemas/AgentConfig'
+ rows:
+ type: array
+ items:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
additionalProperties: false
required:
- - type
- - config
- title: AgentCandidate
- AgentConfig:
+ - dataset_id
+ - rows
+ title: AppendRowsRequest
+ CompletionMessage:
type: object
properties:
- sampling_params:
- $ref: '#/components/schemas/SamplingParams'
- input_shields:
- type: array
- items:
- type: string
- output_shields:
- type: array
- items:
- type: string
- toolgroups:
- type: array
- items:
- $ref: '#/components/schemas/AgentTool'
- client_tools:
- type: array
- items:
- $ref: '#/components/schemas/ToolDef'
- tool_choice:
+ role:
+ type: string
+ const: assistant
+ default: assistant
+ description: >-
+ Must be "assistant" to identify this as the model's response
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ description: The content of the model's response
+ stop_reason:
type: string
enum:
- - auto
- - required
- - none
- title: ToolChoice
+ - end_of_turn
+ - end_of_message
+ - out_of_tokens
description: >-
- Whether tool use is required or automatic. This is a hint to the model
- which may not be followed. It depends on the Instruction Following capabilities
- of the model.
- deprecated: true
- tool_prompt_format:
- type: string
- enum:
- - json
- - function_tag
- - python_list
- title: ToolPromptFormat
- description: >-
- Prompt format for calling custom / zero shot tools.
- deprecated: true
- tool_config:
- $ref: '#/components/schemas/ToolConfig'
- max_infer_iters:
- type: integer
- default: 10
- model:
- type: string
- instructions:
- type: string
- enable_session_persistence:
- type: boolean
- default: false
- response_format:
- $ref: '#/components/schemas/ResponseFormat'
- additionalProperties: false
- required:
- - model
- - instructions
- title: AgentConfig
- AgentTool:
- oneOf:
- - type: string
- - type: object
- properties:
- name:
- type: string
- args:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- - args
- title: AgentToolGroupWithArgs
- AggregationFunctionType:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- BasicScoringFnParams:
- type: object
- properties:
- type:
- type: string
- const: basic
- default: basic
- aggregation_functions:
+ Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
+ The model finished generating the entire response. - `StopReason.end_of_message`:
+ The model finished generating but generated a partial response -- usually,
+ a tool call. The user may call the tool and continue the conversation
+ with the tool's response. - `StopReason.out_of_tokens`: The model ran
+ out of token budget.
+ tool_calls:
type: array
items:
- $ref: '#/components/schemas/AggregationFunctionType'
+ $ref: '#/components/schemas/ToolCall'
+ description: >-
+ List of tool calls. Each tool call is a ToolCall object.
additionalProperties: false
required:
- - type
- title: BasicScoringFnParams
- BenchmarkConfig:
- type: object
- properties:
- type:
- type: string
- const: benchmark
- default: benchmark
- eval_candidate:
- $ref: '#/components/schemas/EvalCandidate'
- scoring_params:
- type: object
- additionalProperties:
- $ref: '#/components/schemas/ScoringFnParams'
- num_examples:
- type: integer
- additionalProperties: false
- required:
- - type
- - eval_candidate
- - scoring_params
- title: BenchmarkConfig
- EvalCandidate:
- oneOf:
- - $ref: '#/components/schemas/ModelCandidate'
- - $ref: '#/components/schemas/AgentCandidate'
- discriminator:
- propertyName: type
- mapping:
- model: '#/components/schemas/ModelCandidate'
- agent: '#/components/schemas/AgentCandidate'
+ - role
+ - content
+ - stop_reason
+ title: CompletionMessage
+ description: >-
+ A message containing the model's (assistant) response in a chat conversation.
GrammarResponseFormat:
type: object
properties:
@@ -2023,68 +1764,19 @@ components:
title: JsonSchemaResponseFormat
description: >-
Configuration for JSON schema-guided response generation.
- LLMAsJudgeScoringFnParams:
- type: object
- properties:
- type:
- type: string
- const: llm_as_judge
- default: llm_as_judge
- judge_model:
- type: string
- prompt_template:
- type: string
- judge_score_regexes:
- type: array
- items:
- type: string
- aggregation_functions:
- type: array
- items:
- $ref: '#/components/schemas/AggregationFunctionType'
- additionalProperties: false
- required:
- - type
- - judge_model
- title: LLMAsJudgeScoringFnParams
- ModelCandidate:
- type: object
- properties:
- type:
- type: string
- const: model
- default: model
- model:
- type: string
- sampling_params:
- $ref: '#/components/schemas/SamplingParams'
- system_message:
- $ref: '#/components/schemas/SystemMessage'
- additionalProperties: false
- required:
- - type
- - model
- - sampling_params
- title: ModelCandidate
- RegexParserScoringFnParams:
- type: object
- properties:
- type:
- type: string
- const: regex_parser
- default: regex_parser
- parsing_regexes:
- type: array
- items:
- type: string
- aggregation_functions:
- type: array
- items:
- $ref: '#/components/schemas/AggregationFunctionType'
- additionalProperties: false
- required:
- - type
- title: RegexParserScoringFnParams
+ Message:
+ oneOf:
+ - $ref: '#/components/schemas/UserMessage'
+ - $ref: '#/components/schemas/SystemMessage'
+ - $ref: '#/components/schemas/ToolResponseMessage'
+ - $ref: '#/components/schemas/CompletionMessage'
+ discriminator:
+ propertyName: role
+ mapping:
+ user: '#/components/schemas/UserMessage'
+ system: '#/components/schemas/SystemMessage'
+ tool: '#/components/schemas/ToolResponseMessage'
+ assistant: '#/components/schemas/CompletionMessage'
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
@@ -2120,17 +1812,6 @@ components:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
- ScoringFnParams:
- oneOf:
- - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
- - $ref: '#/components/schemas/RegexParserScoringFnParams'
- - $ref: '#/components/schemas/BasicScoringFnParams'
- discriminator:
- propertyName: type
- mapping:
- llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
- regex_parser: '#/components/schemas/RegexParserScoringFnParams'
- basic: '#/components/schemas/BasicScoringFnParams'
SystemMessage:
type: object
properties:
@@ -2171,407 +1852,6 @@ components:
- text
title: TextContentItem
description: A text content item
- ToolConfig:
- type: object
- properties:
- tool_choice:
- oneOf:
- - type: string
- enum:
- - auto
- - required
- - none
- title: ToolChoice
- description: >-
- Whether tool use is required or automatic. This is a hint to the model
- which may not be followed. It depends on the Instruction Following
- capabilities of the model.
- - type: string
- default: auto
- description: >-
- (Optional) Whether tool use is automatic, required, or none. Can also
- specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
- tool_prompt_format:
- type: string
- enum:
- - json
- - function_tag
- - python_list
- description: >-
- (Optional) Instructs the model how to format tool calls. By default, Llama
- Stack will attempt to use a format that is best adapted to the model.
- - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
- - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
- tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python
- syntax -- a list of function calls.
- system_message_behavior:
- type: string
- enum:
- - append
- - replace
- description: >-
- (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`:
- Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`:
- Replaces the default system prompt with the provided system message. The
- system message can include the string '{{function_definitions}}' to indicate
- where the function definitions should be inserted.
- default: append
- additionalProperties: false
- title: ToolConfig
- description: Configuration for tool use.
- ToolDef:
- type: object
- properties:
- name:
- type: string
- description:
- type: string
- parameters:
- type: array
- items:
- $ref: '#/components/schemas/ToolParameter'
- metadata:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- title: ToolDef
- ToolParameter:
- type: object
- properties:
- name:
- type: string
- parameter_type:
- type: string
- description:
- type: string
- required:
- type: boolean
- default: true
- default:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- - parameter_type
- - description
- - required
- title: ToolParameter
- TopKSamplingStrategy:
- type: object
- properties:
- type:
- type: string
- const: top_k
- default: top_k
- top_k:
- type: integer
- additionalProperties: false
- required:
- - type
- - top_k
- title: TopKSamplingStrategy
- TopPSamplingStrategy:
- type: object
- properties:
- type:
- type: string
- const: top_p
- default: top_p
- temperature:
- type: number
- top_p:
- type: number
- default: 0.95
- additionalProperties: false
- required:
- - type
- title: TopPSamplingStrategy
- URL:
- type: object
- properties:
- uri:
- type: string
- additionalProperties: false
- required:
- - uri
- title: URL
- DeprecatedEvaluateRowsRequest:
- type: object
- properties:
- input_rows:
- type: array
- items:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- scoring_functions:
- type: array
- items:
- type: string
- task_config:
- $ref: '#/components/schemas/BenchmarkConfig'
- additionalProperties: false
- required:
- - input_rows
- - scoring_functions
- - task_config
- title: DeprecatedEvaluateRowsRequest
- EvaluateResponse:
- type: object
- properties:
- generations:
- type: array
- items:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- scores:
- type: object
- additionalProperties:
- $ref: '#/components/schemas/ScoringResult'
- additionalProperties: false
- required:
- - generations
- - scores
- title: EvaluateResponse
- ScoringResult:
- type: object
- properties:
- score_rows:
- type: array
- items:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- aggregated_results:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - score_rows
- - aggregated_results
- title: ScoringResult
- Benchmark:
- type: object
- properties:
- identifier:
- type: string
- provider_resource_id:
- type: string
- provider_id:
- type: string
- type:
- type: string
- const: benchmark
- default: benchmark
- dataset_id:
- type: string
- scoring_functions:
- type: array
- items:
- type: string
- metadata:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - identifier
- - provider_resource_id
- - provider_id
- - type
- - dataset_id
- - scoring_functions
- - metadata
- title: Benchmark
- JobStatus:
- type: string
- enum:
- - completed
- - in_progress
- - failed
- - scheduled
- title: JobStatus
- ListBenchmarksResponse:
- type: object
- properties:
- data:
- type: array
- items:
- $ref: '#/components/schemas/Benchmark'
- additionalProperties: false
- required:
- - data
- title: ListBenchmarksResponse
- DeprecatedRegisterEvalTaskRequest:
- type: object
- properties:
- eval_task_id:
- type: string
- dataset_id:
- type: string
- scoring_functions:
- type: array
- items:
- type: string
- provider_benchmark_id:
- type: string
- provider_id:
- type: string
- metadata:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - eval_task_id
- - dataset_id
- - scoring_functions
- title: DeprecatedRegisterEvalTaskRequest
- DeprecatedRunEvalRequest:
- type: object
- properties:
- task_config:
- $ref: '#/components/schemas/BenchmarkConfig'
- additionalProperties: false
- required:
- - task_config
- title: DeprecatedRunEvalRequest
- Job:
- type: object
- properties:
- job_id:
- type: string
- additionalProperties: false
- required:
- - job_id
- title: Job
- AppendRowsRequest:
- type: object
- properties:
- dataset_id:
- type: string
- rows:
- type: array
- items:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - dataset_id
- - rows
- title: AppendRowsRequest
- CompletionMessage:
- type: object
- properties:
- role:
- type: string
- const: assistant
- default: assistant
- description: >-
- Must be "assistant" to identify this as the model's response
- content:
- $ref: '#/components/schemas/InterleavedContent'
- description: The content of the model's response
- stop_reason:
- type: string
- enum:
- - end_of_turn
- - end_of_message
- - out_of_tokens
- description: >-
- Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
- The model finished generating the entire response. - `StopReason.end_of_message`:
- The model finished generating but generated a partial response -- usually,
- a tool call. The user may call the tool and continue the conversation
- with the tool's response. - `StopReason.out_of_tokens`: The model ran
- out of token budget.
- tool_calls:
- type: array
- items:
- $ref: '#/components/schemas/ToolCall'
- description: >-
- List of tool calls. Each tool call is a ToolCall object.
- additionalProperties: false
- required:
- - role
- - content
- - stop_reason
- title: CompletionMessage
- description: >-
- A message containing the model's (assistant) response in a chat conversation.
- Message:
- oneOf:
- - $ref: '#/components/schemas/UserMessage'
- - $ref: '#/components/schemas/SystemMessage'
- - $ref: '#/components/schemas/ToolResponseMessage'
- - $ref: '#/components/schemas/CompletionMessage'
- discriminator:
- propertyName: role
- mapping:
- user: '#/components/schemas/UserMessage'
- system: '#/components/schemas/SystemMessage'
- tool: '#/components/schemas/ToolResponseMessage'
- assistant: '#/components/schemas/CompletionMessage'
ToolCall:
type: object
properties:
@@ -2699,6 +1979,45 @@ components:
title: ToolResponseMessage
description: >-
A message representing the result of a tool invocation.
+ TopKSamplingStrategy:
+ type: object
+ properties:
+ type:
+ type: string
+ const: top_k
+ default: top_k
+ top_k:
+ type: integer
+ additionalProperties: false
+ required:
+ - type
+ - top_k
+ title: TopKSamplingStrategy
+ TopPSamplingStrategy:
+ type: object
+ properties:
+ type:
+ type: string
+ const: top_p
+ default: top_p
+ temperature:
+ type: number
+ top_p:
+ type: number
+ default: 0.95
+ additionalProperties: false
+ required:
+ - type
+ title: TopPSamplingStrategy
+ URL:
+ type: object
+ properties:
+ uri:
+ type: string
+ additionalProperties: false
+ required:
+ - uri
+ title: URL
UserMessage:
type: object
properties:
@@ -2938,6 +2257,54 @@ components:
required:
- job_uuid
title: CancelTrainingJobRequest
+ ToolConfig:
+ type: object
+ properties:
+ tool_choice:
+ oneOf:
+ - type: string
+ enum:
+ - auto
+ - required
+ - none
+ title: ToolChoice
+ description: >-
+ Whether tool use is required or automatic. This is a hint to the model
+ which may not be followed. It depends on the Instruction Following
+ capabilities of the model.
+ - type: string
+ default: auto
+ description: >-
+ (Optional) Whether tool use is automatic, required, or none. Can also
+ specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
+ tool_prompt_format:
+ type: string
+ enum:
+ - json
+ - function_tag
+ - python_list
+ description: >-
+ (Optional) Instructs the model how to format tool calls. By default, Llama
+ Stack will attempt to use a format that is best adapted to the model.
+ - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
+ - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+ tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python
+ syntax -- a list of function calls.
+ system_message_behavior:
+ type: string
+ enum:
+ - append
+ - replace
+ description: >-
+ (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`:
+ Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`:
+ Replaces the default system prompt with the provided system message. The
+ system message can include the string '{{function_definitions}}' to indicate
+ where the function definitions should be inserted.
+ default: append
+ additionalProperties: false
+ title: ToolConfig
+ description: Configuration for tool use.
ChatCompletionRequest:
type: object
properties:
@@ -3201,6 +2568,142 @@ components:
title: CompletionResponseStreamChunk
description: >-
A chunk of a streamed completion response.
+ AgentConfig:
+ type: object
+ properties:
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
+ input_shields:
+ type: array
+ items:
+ type: string
+ output_shields:
+ type: array
+ items:
+ type: string
+ toolgroups:
+ type: array
+ items:
+ $ref: '#/components/schemas/AgentTool'
+ client_tools:
+ type: array
+ items:
+ $ref: '#/components/schemas/ToolDef'
+ tool_choice:
+ type: string
+ enum:
+ - auto
+ - required
+ - none
+ title: ToolChoice
+ description: >-
+ Whether tool use is required or automatic. This is a hint to the model
+ which may not be followed. It depends on the Instruction Following capabilities
+ of the model.
+ deprecated: true
+ tool_prompt_format:
+ type: string
+ enum:
+ - json
+ - function_tag
+ - python_list
+ title: ToolPromptFormat
+ description: >-
+ Prompt format for calling custom / zero shot tools.
+ deprecated: true
+ tool_config:
+ $ref: '#/components/schemas/ToolConfig'
+ max_infer_iters:
+ type: integer
+ default: 10
+ model:
+ type: string
+ instructions:
+ type: string
+ enable_session_persistence:
+ type: boolean
+ default: false
+ response_format:
+ $ref: '#/components/schemas/ResponseFormat'
+ additionalProperties: false
+ required:
+ - model
+ - instructions
+ title: AgentConfig
+ AgentTool:
+ oneOf:
+ - type: string
+ - type: object
+ properties:
+ name:
+ type: string
+ args:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ - args
+ title: AgentToolGroupWithArgs
+ ToolDef:
+ type: object
+ properties:
+ name:
+ type: string
+ description:
+ type: string
+ parameters:
+ type: array
+ items:
+ $ref: '#/components/schemas/ToolParameter'
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ title: ToolDef
+ ToolParameter:
+ type: object
+ properties:
+ name:
+ type: string
+ parameter_type:
+ type: string
+ description:
+ type: string
+ required:
+ type: boolean
+ default: true
+ default:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ - parameter_type
+ - description
+ - required
+ title: ToolParameter
CreateAgentRequest:
type: object
properties:
@@ -3789,6 +3292,141 @@ components:
title: EmbeddingsResponse
description: >-
Response containing generated embeddings.
+ AgentCandidate:
+ type: object
+ properties:
+ type:
+ type: string
+ const: agent
+ default: agent
+ config:
+ $ref: '#/components/schemas/AgentConfig'
+ additionalProperties: false
+ required:
+ - type
+ - config
+ title: AgentCandidate
+ AggregationFunctionType:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ BasicScoringFnParams:
+ type: object
+ properties:
+ type:
+ type: string
+ const: basic
+ default: basic
+ aggregation_functions:
+ type: array
+ items:
+ $ref: '#/components/schemas/AggregationFunctionType'
+ additionalProperties: false
+ required:
+ - type
+ title: BasicScoringFnParams
+ BenchmarkConfig:
+ type: object
+ properties:
+ eval_candidate:
+ $ref: '#/components/schemas/EvalCandidate'
+ scoring_params:
+ type: object
+ additionalProperties:
+ $ref: '#/components/schemas/ScoringFnParams'
+ num_examples:
+ type: integer
+ additionalProperties: false
+ required:
+ - eval_candidate
+ - scoring_params
+ title: BenchmarkConfig
+ EvalCandidate:
+ oneOf:
+ - $ref: '#/components/schemas/ModelCandidate'
+ - $ref: '#/components/schemas/AgentCandidate'
+ discriminator:
+ propertyName: type
+ mapping:
+ model: '#/components/schemas/ModelCandidate'
+ agent: '#/components/schemas/AgentCandidate'
+ LLMAsJudgeScoringFnParams:
+ type: object
+ properties:
+ type:
+ type: string
+ const: llm_as_judge
+ default: llm_as_judge
+ judge_model:
+ type: string
+ prompt_template:
+ type: string
+ judge_score_regexes:
+ type: array
+ items:
+ type: string
+ aggregation_functions:
+ type: array
+ items:
+ $ref: '#/components/schemas/AggregationFunctionType'
+ additionalProperties: false
+ required:
+ - type
+ - judge_model
+ title: LLMAsJudgeScoringFnParams
+ ModelCandidate:
+ type: object
+ properties:
+ type:
+ type: string
+ const: model
+ default: model
+ model:
+ type: string
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
+ system_message:
+ $ref: '#/components/schemas/SystemMessage'
+ additionalProperties: false
+ required:
+ - type
+ - model
+ - sampling_params
+ title: ModelCandidate
+ RegexParserScoringFnParams:
+ type: object
+ properties:
+ type:
+ type: string
+ const: regex_parser
+ default: regex_parser
+ parsing_regexes:
+ type: array
+ items:
+ type: string
+ aggregation_functions:
+ type: array
+ items:
+ $ref: '#/components/schemas/AggregationFunctionType'
+ additionalProperties: false
+ required:
+ - type
+ title: RegexParserScoringFnParams
+ ScoringFnParams:
+ oneOf:
+ - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ - $ref: '#/components/schemas/RegexParserScoringFnParams'
+ - $ref: '#/components/schemas/BasicScoringFnParams'
+ discriminator:
+ propertyName: type
+ mapping:
+ llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+ basic: '#/components/schemas/BasicScoringFnParams'
EvaluateRowsRequest:
type: object
properties:
@@ -3816,6 +3454,60 @@ components:
- scoring_functions
- task_config
title: EvaluateRowsRequest
+ EvaluateResponse:
+ type: object
+ properties:
+ generations:
+ type: array
+ items:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ scores:
+ type: object
+ additionalProperties:
+ $ref: '#/components/schemas/ScoringResult'
+ additionalProperties: false
+ required:
+ - generations
+ - scores
+ title: EvaluateResponse
+ ScoringResult:
+ type: object
+ properties:
+ score_rows:
+ type: array
+ items:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ aggregated_results:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - score_rows
+ - aggregated_results
+ title: ScoringResult
Session:
type: object
properties:
@@ -3859,6 +3551,45 @@ components:
required:
- step
title: AgentStepResponse
+ Benchmark:
+ type: object
+ properties:
+ identifier:
+ type: string
+ provider_resource_id:
+ type: string
+ provider_id:
+ type: string
+ type:
+ type: string
+ const: benchmark
+ default: benchmark
+ dataset_id:
+ type: string
+ scoring_functions:
+ type: array
+ items:
+ type: string
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - identifier
+ - provider_resource_id
+ - provider_id
+ - type
+ - dataset_id
+ - scoring_functions
+ - metadata
+ title: Benchmark
AgentTurnInputType:
type: object
properties:
@@ -4375,6 +4106,14 @@ components:
- checkpoints
title: PostTrainingJobArtifactsResponse
description: Artifacts of a finetuning job.
+ JobStatus:
+ type: string
+ enum:
+ - completed
+ - in_progress
+ - failed
+ - scheduled
+ title: JobStatus
PostTrainingJobStatusResponse:
type: object
properties:
@@ -4603,6 +4342,17 @@ components:
title: ListBucketResponse
description: >-
Response representing a list of file entries.
+ ListBenchmarksResponse:
+ type: object
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/Benchmark'
+ additionalProperties: false
+ required:
+ - data
+ title: ListBenchmarksResponse
ListDatasetsResponse:
type: object
properties:
@@ -5429,6 +5179,15 @@ components:
required:
- task_config
title: RunEvalRequest
+ Job:
+ type: object
+ properties:
+ job_id:
+ type: string
+ additionalProperties: false
+ required:
+ - job_id
+ title: Job
RunShieldRequest:
type: object
properties:
diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
index 8eecf84ab..f3f41b18a 100644
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@@ -1017,14 +1017,14 @@
" \"content\": SYSTEM_PROMPT_TEMPLATE.format(subject=subset),\n",
"}\n",
"\n",
- "client.eval_tasks.register(\n",
- " eval_task_id=\"meta-reference::mmmu\",\n",
+ "client.benchmarks.register(\n",
+ " benchmark_id=\"meta-reference::mmmu\",\n",
" dataset_id=f\"mmmu-{subset}-{split}\",\n",
" scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n",
")\n",
"\n",
- "response = client.eval.evaluate_rows(\n",
- " task_id=\"meta-reference::mmmu\",\n",
+ "response = client.eval.evaluate_rows_alpha(\n",
+ " benchmark_id=\"meta-reference::mmmu\",\n",
" input_rows=eval_rows,\n",
" scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n",
" task_config={\n",
@@ -1196,14 +1196,14 @@
" provider_id=\"together\",\n",
")\n",
"\n",
- "client.eval_tasks.register(\n",
- " eval_task_id=\"meta-reference::simpleqa\",\n",
+ "client.benchmarks.register(\n",
+ " benchmark_id=\"meta-reference::simpleqa\",\n",
" dataset_id=simpleqa_dataset_id,\n",
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
")\n",
"\n",
- "response = client.eval.evaluate_rows(\n",
- " task_id=\"meta-reference::simpleqa\",\n",
+ "response = client.eval.evaluate_rows_alpha(\n",
+ " benchmark_id=\"meta-reference::simpleqa\",\n",
" input_rows=eval_rows.rows,\n",
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
" task_config={\n",
@@ -1351,8 +1351,8 @@
" \"enable_session_persistence\": False,\n",
"}\n",
"\n",
- "response = client.eval.evaluate_rows(\n",
- " task_id=\"meta-reference::simpleqa\",\n",
+ "response = client.eval.evaluate_rows_alpha(\n",
+ " benchmark_id=\"meta-reference::simpleqa\",\n",
" input_rows=eval_rows.rows,\n",
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
" task_config={\n",
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py
index 91b1ca927..39ba355e9 100644
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@@ -64,23 +64,3 @@ class Benchmarks(Protocol):
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> None: ...
-
- @webmethod(route="/eval-tasks", method="GET")
- async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse: ...
-
- @webmethod(route="/eval-tasks/{eval_task_id}", method="GET")
- async def DEPRECATED_get_eval_task(
- self,
- eval_task_id: str,
- ) -> Optional[Benchmark]: ...
-
- @webmethod(route="/eval-tasks", method="POST")
- async def DEPRECATED_register_eval_task(
- self,
- eval_task_id: str,
- dataset_id: str,
- scoring_functions: List[str],
- provider_benchmark_id: Optional[str] = None,
- provider_id: Optional[str] = None,
- metadata: Optional[Dict[str, Any]] = None,
- ) -> None: ...
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index e2ff4458e..a7b2e7670 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -39,7 +39,6 @@ EvalCandidate = register_schema(
@json_schema_type
class BenchmarkConfig(BaseModel):
- type: Literal["benchmark"] = "benchmark"
eval_candidate: EvalCandidate
scoring_params: Dict[str, ScoringFnParams] = Field(
description="Map between scoring function id and parameters for each scoring function you want to run",
@@ -84,28 +83,3 @@ class Eval(Protocol):
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
-
- @webmethod(route="/eval/tasks/{task_id}/jobs", method="POST")
- async def DEPRECATED_run_eval(
- self,
- task_id: str,
- task_config: BenchmarkConfig,
- ) -> Job: ...
-
- @webmethod(route="/eval/tasks/{task_id}/evaluations", method="POST")
- async def DEPRECATED_evaluate_rows(
- self,
- task_id: str,
- input_rows: List[Dict[str, Any]],
- scoring_functions: List[str],
- task_config: BenchmarkConfig,
- ) -> EvaluateResponse: ...
-
- @webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="GET")
- async def DEPRECATED_job_status(self, task_id: str, job_id: str) -> Optional[JobStatus]: ...
-
- @webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="DELETE")
- async def DEPRECATED_job_cancel(self, task_id: str, job_id: str) -> None: ...
-
- @webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}/result", method="GET")
- async def DEPRECATED_job_result(self, task_id: str, job_id: str) -> EvaluateResponse: ...
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 9d12c8a40..016ca4984 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -411,48 +411,6 @@ class EvalRouter(Eval):
job_id,
)
- async def DEPRECATED_run_eval(
- self,
- task_id: str,
- task_config: BenchmarkConfig,
- ) -> Job:
- return await self.run_eval(benchmark_id=task_id, task_config=task_config)
-
- async def DEPRECATED_evaluate_rows(
- self,
- task_id: str,
- input_rows: List[Dict[str, Any]],
- scoring_functions: List[str],
- task_config: BenchmarkConfig,
- ) -> EvaluateResponse:
- return await self.evaluate_rows(
- benchmark_id=task_id,
- input_rows=input_rows,
- scoring_functions=scoring_functions,
- task_config=task_config,
- )
-
- async def DEPRECATED_job_status(
- self,
- task_id: str,
- job_id: str,
- ) -> Optional[JobStatus]:
- return await self.job_status(benchmark_id=task_id, job_id=job_id)
-
- async def DEPRECATED_job_cancel(
- self,
- task_id: str,
- job_id: str,
- ) -> None:
- return await self.job_cancel(benchmark_id=task_id, job_id=job_id)
-
- async def DEPRECATED_job_result(
- self,
- task_id: str,
- job_id: str,
- ) -> EvaluateResponse:
- return await self.job_result(benchmark_id=task_id, job_id=job_id)
-
class ToolRuntimeRouter(ToolRuntime):
class RagToolImpl(RAGToolRuntime):
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index 2cddc3970..c2434e517 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -468,35 +468,6 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
)
await self.register_object(benchmark)
- async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse:
- logger.warning("DEPRECATED: Use /eval/benchmarks instead")
- return await self.list_benchmarks()
-
- async def DEPRECATED_get_eval_task(
- self,
- eval_task_id: str,
- ) -> Optional[Benchmark]:
- logger.warning("DEPRECATED: Use /eval/benchmarks instead")
- return await self.get_benchmark(eval_task_id)
-
- async def DEPRECATED_register_eval_task(
- self,
- eval_task_id: str,
- dataset_id: str,
- scoring_functions: List[str],
- provider_benchmark_id: Optional[str] = None,
- provider_id: Optional[str] = None,
- metadata: Optional[Dict[str, Any]] = None,
- ) -> None:
- logger.warning("DEPRECATED: Use /eval/benchmarks instead")
- return await self.register_benchmark(
- benchmark_id=eval_task_id,
- dataset_id=dataset_id,
- scoring_functions=scoring_functions,
- metadata=metadata,
- provider_benchmark_id=provider_benchmark_id,
- )
-
class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
async def list_tools(self, toolgroup_id: Optional[str] = None) -> ListToolsResponse:
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index 0f77b7347..18d408a31 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -234,45 +234,3 @@ class MetaReferenceEvalImpl(
raise ValueError(f"Job is not completed, Status: {status.value}")
return self.jobs[job_id]
-
- async def DEPRECATED_run_eval(
- self,
- task_id: str,
- task_config: BenchmarkConfig,
- ) -> Job:
- return await self.run_eval(benchmark_id=task_id, task_config=task_config)
-
- async def DEPRECATED_evaluate_rows(
- self,
- task_id: str,
- input_rows: List[Dict[str, Any]],
- scoring_functions: List[str],
- task_config: BenchmarkConfig,
- ) -> EvaluateResponse:
- return await self.evaluate_rows(
- benchmark_id=task_id,
- input_rows=input_rows,
- scoring_functions=scoring_functions,
- task_config=task_config,
- )
-
- async def DEPRECATED_job_status(
- self,
- task_id: str,
- job_id: str,
- ) -> Optional[JobStatus]:
- return await self.job_status(benchmark_id=task_id, job_id=job_id)
-
- async def DEPRECATED_job_cancel(
- self,
- task_id: str,
- job_id: str,
- ) -> None:
- return await self.job_cancel(benchmark_id=task_id, job_id=job_id)
-
- async def DEPRECATED_job_result(
- self,
- task_id: str,
- job_id: str,
- ) -> EvaluateResponse:
- return await self.job_result(benchmark_id=task_id, job_id=job_id)