diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 02d05776d..6d199e29d 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -40,286 +40,6 @@ } ], "paths": { - "/v1/eval/tasks/{task_id}/evaluations": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateResponse" - } - } - } - } - }, - "tags": [ - "Eval" - ], - "description": "", - "parameters": [ - { - "name": "task_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DeprecatedEvaluateRowsRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/eval-tasks/{eval_task_id}": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Benchmark" - }, - { - "type": "null" - } - ] - } - } - } - } - }, - "tags": [ - "Benchmarks" - ], - "description": "", - "parameters": [ - { - "name": "eval_task_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": true - } - }, - "/v1/eval/tasks/{task_id}/jobs/{job_id}": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/JobStatus" - }, - { - "type": "null" - } - ] - } - } - } - } - }, - "tags": [ - "Eval" - ], - "description": "", - "parameters": [ - { - "name": "task_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": true - }, - "delete": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Eval" - ], - "description": "", - "parameters": [ - { - "name": "task_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": true - } - }, - "/v1/eval/tasks/{task_id}/jobs/{job_id}/result": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateResponse" - } - } - } - } - }, - "tags": [ - "Eval" - ], - "description": "", - "parameters": [ - { - "name": "task_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "deprecated": true - } - }, - "/v1/eval-tasks": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ListBenchmarksResponse" - } - } - } - } - }, - "tags": [ - "Benchmarks" - ], - "description": "", - "parameters": [], - "deprecated": true - }, - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Benchmarks" - ], - "description": "", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DeprecatedRegisterEvalTaskRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, - "/v1/eval/tasks/{task_id}/jobs": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Job" - } - } - } - } - }, - "tags": [ - "Eval" - ], - "description": "", - "parameters": [ - { - "name": "task_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DeprecatedRunEvalRequest" - } - } - }, - "required": true - }, - "deprecated": true - } - }, "/v1/datasetio/rows": { "get": { "responses": { @@ -2898,227 +2618,86 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { - "AgentCandidate": { + "AppendRowsRequest": { "type": "object", "properties": { - "type": { - "type": "string", - "const": "agent", - "default": "agent" - }, - "config": { - "$ref": "#/components/schemas/AgentConfig" - } - }, - "additionalProperties": false, - "required": [ - "type", - "config" - ], - "title": "AgentCandidate" - }, - "AgentConfig": { - "type": "object", - "properties": { - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "input_shields": { - "type": "array", - "items": { - "type": "string" - } - }, - "output_shields": { - "type": "array", - "items": { - "type": "string" - } - }, - "toolgroups": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AgentTool" - } - }, - "client_tools": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolDef" - } - }, - "tool_choice": { - "type": "string", - "enum": [ - "auto", - "required", - "none" - ], - "title": "ToolChoice", - "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.", - "deprecated": true - }, - "tool_prompt_format": { - "type": "string", - "enum": [ - "json", - "function_tag", - "python_list" - ], - "title": "ToolPromptFormat", - "description": "Prompt format for calling custom / zero shot tools.", - "deprecated": true - }, - "tool_config": { - "$ref": "#/components/schemas/ToolConfig" - }, - "max_infer_iters": { - "type": "integer", - "default": 10 - }, - "model": { + "dataset_id": { "type": "string" }, - "instructions": { - "type": "string" - }, - "enable_session_persistence": { - "type": "boolean", - "default": false - }, - "response_format": { - "$ref": "#/components/schemas/ResponseFormat" - } - }, - "additionalProperties": false, - "required": [ - "model", - "instructions" - ], - "title": "AgentConfig" - }, - "AgentTool": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "args": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } + "rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] } - }, - "additionalProperties": false, - "required": [ - "name", - "args" - ], - "title": "AgentToolGroupWithArgs" + } } - ] - }, - "AggregationFunctionType": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" + }, + "additionalProperties": false, + "required": [ + "dataset_id", + "rows" ], - "title": "AggregationFunctionType" + "title": "AppendRowsRequest" }, - "BasicScoringFnParams": { + "CompletionMessage": { "type": "object", "properties": { - "type": { + "role": { "type": "string", - "const": "basic", - "default": "basic" + "const": "assistant", + "default": "assistant", + "description": "Must be \"assistant\" to identify this as the model's response" }, - "aggregation_functions": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The content of the model's response" + }, + "stop_reason": { + "type": "string", + "enum": [ + "end_of_turn", + "end_of_message", + "out_of_tokens" + ], + "description": "Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: The model finished generating the entire response. - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response. - `StopReason.out_of_tokens`: The model ran out of token budget." + }, + "tool_calls": { "type": "array", "items": { - "$ref": "#/components/schemas/AggregationFunctionType" - } + "$ref": "#/components/schemas/ToolCall" + }, + "description": "List of tool calls. Each tool call is a ToolCall object." } }, "additionalProperties": false, "required": [ - "type" + "role", + "content", + "stop_reason" ], - "title": "BasicScoringFnParams" - }, - "BenchmarkConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "benchmark", - "default": "benchmark" - }, - "eval_candidate": { - "$ref": "#/components/schemas/EvalCandidate" - }, - "scoring_params": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringFnParams" - } - }, - "num_examples": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "type", - "eval_candidate", - "scoring_params" - ], - "title": "BenchmarkConfig" - }, - "EvalCandidate": { - "oneOf": [ - { - "$ref": "#/components/schemas/ModelCandidate" - }, - { - "$ref": "#/components/schemas/AgentCandidate" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "model": "#/components/schemas/ModelCandidate", - "agent": "#/components/schemas/AgentCandidate" - } - } + "title": "CompletionMessage", + "description": "A message containing the model's (assistant) response in a chat conversation." }, "GrammarResponseFormat": { "type": "object", @@ -3290,92 +2869,30 @@ "title": "JsonSchemaResponseFormat", "description": "Configuration for JSON schema-guided response generation." }, - "LLMAsJudgeScoringFnParams": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm_as_judge", - "default": "llm_as_judge" + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" }, - "judge_model": { - "type": "string" - }, - "prompt_template": { - "type": "string" - }, - "judge_score_regexes": { - "type": "array", - "items": { - "type": "string" - } - }, - "aggregation_functions": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AggregationFunctionType" - } - } - }, - "additionalProperties": false, - "required": [ - "type", - "judge_model" - ], - "title": "LLMAsJudgeScoringFnParams" - }, - "ModelCandidate": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "model", - "default": "model" - }, - "model": { - "type": "string" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "system_message": { + { "$ref": "#/components/schemas/SystemMessage" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "sampling_params" - ], - "title": "ModelCandidate" - }, - "RegexParserScoringFnParams": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "regex_parser", - "default": "regex_parser" }, - "parsing_regexes": { - "type": "array", - "items": { - "type": "string" - } + { + "$ref": "#/components/schemas/ToolResponseMessage" }, - "aggregation_functions": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AggregationFunctionType" - } + { + "$ref": "#/components/schemas/CompletionMessage" } - }, - "additionalProperties": false, - "required": [ - "type" ], - "title": "RegexParserScoringFnParams" + "discriminator": { + "propertyName": "role", + "mapping": { + "user": "#/components/schemas/UserMessage", + "system": "#/components/schemas/SystemMessage", + "tool": "#/components/schemas/ToolResponseMessage", + "assistant": "#/components/schemas/CompletionMessage" + } + } }, "ResponseFormat": { "oneOf": [ @@ -3436,27 +2953,6 @@ } } }, - "ScoringFnParams": { - "oneOf": [ - { - "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams" - }, - { - "$ref": "#/components/schemas/RegexParserScoringFnParams" - }, - { - "$ref": "#/components/schemas/BasicScoringFnParams" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", - "regex_parser": "#/components/schemas/RegexParserScoringFnParams", - "basic": "#/components/schemas/BasicScoringFnParams" - } - } - }, "SystemMessage": { "type": "object", "properties": { @@ -3501,635 +2997,6 @@ "title": "TextContentItem", "description": "A text content item" }, - "ToolConfig": { - "type": "object", - "properties": { - "tool_choice": { - "oneOf": [ - { - "type": "string", - "enum": [ - "auto", - "required", - "none" - ], - "title": "ToolChoice", - "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model." - }, - { - "type": "string" - } - ], - "default": "auto", - "description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto." - }, - "tool_prompt_format": { - "type": "string", - "enum": [ - "json", - "function_tag", - "python_list" - ], - "description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls." - }, - "system_message_behavior": { - "type": "string", - "enum": [ - "append", - "replace" - ], - "description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.", - "default": "append" - } - }, - "additionalProperties": false, - "title": "ToolConfig", - "description": "Configuration for tool use." - }, - "ToolDef": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolParameter" - } - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "name" - ], - "title": "ToolDef" - }, - "ToolParameter": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "parameter_type": { - "type": "string" - }, - "description": { - "type": "string" - }, - "required": { - "type": "boolean", - "default": true - }, - "default": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "name", - "parameter_type", - "description", - "required" - ], - "title": "ToolParameter" - }, - "TopKSamplingStrategy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "top_k", - "default": "top_k" - }, - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "type", - "top_k" - ], - "title": "TopKSamplingStrategy" - }, - "TopPSamplingStrategy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "top_p", - "default": "top_p" - }, - "temperature": { - "type": "number" - }, - "top_p": { - "type": "number", - "default": 0.95 - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "TopPSamplingStrategy" - }, - "URL": { - "type": "object", - "properties": { - "uri": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "uri" - ], - "title": "URL" - }, - "DeprecatedEvaluateRowsRequest": { - "type": "object", - "properties": { - "input_rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "scoring_functions": { - "type": "array", - "items": { - "type": "string" - } - }, - "task_config": { - "$ref": "#/components/schemas/BenchmarkConfig" - } - }, - "additionalProperties": false, - "required": [ - "input_rows", - "scoring_functions", - "task_config" - ], - "title": "DeprecatedEvaluateRowsRequest" - }, - "EvaluateResponse": { - "type": "object", - "properties": { - "generations": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "scores": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - } - } - }, - "additionalProperties": false, - "required": [ - "generations", - "scores" - ], - "title": "EvaluateResponse" - }, - "ScoringResult": { - "type": "object", - "properties": { - "score_rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "aggregated_results": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "score_rows", - "aggregated_results" - ], - "title": "ScoringResult" - }, - "Benchmark": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "benchmark", - "default": "benchmark" - }, - "dataset_id": { - "type": "string" - }, - "scoring_functions": { - "type": "array", - "items": { - "type": "string" - } - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_resource_id", - "provider_id", - "type", - "dataset_id", - "scoring_functions", - "metadata" - ], - "title": "Benchmark" - }, - "JobStatus": { - "type": "string", - "enum": [ - "completed", - "in_progress", - "failed", - "scheduled" - ], - "title": "JobStatus" - }, - "ListBenchmarksResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Benchmark" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListBenchmarksResponse" - }, - "DeprecatedRegisterEvalTaskRequest": { - "type": "object", - "properties": { - "eval_task_id": { - "type": "string" - }, - "dataset_id": { - "type": "string" - }, - "scoring_functions": { - "type": "array", - "items": { - "type": "string" - } - }, - "provider_benchmark_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "eval_task_id", - "dataset_id", - "scoring_functions" - ], - "title": "DeprecatedRegisterEvalTaskRequest" - }, - "DeprecatedRunEvalRequest": { - "type": "object", - "properties": { - "task_config": { - "$ref": "#/components/schemas/BenchmarkConfig" - } - }, - "additionalProperties": false, - "required": [ - "task_config" - ], - "title": "DeprecatedRunEvalRequest" - }, - "Job": { - "type": "object", - "properties": { - "job_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_id" - ], - "title": "Job" - }, - "AppendRowsRequest": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string" - }, - "rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - } - }, - "additionalProperties": false, - "required": [ - "dataset_id", - "rows" - ], - "title": "AppendRowsRequest" - }, - "CompletionMessage": { - "type": "object", - "properties": { - "role": { - "type": "string", - "const": "assistant", - "default": "assistant", - "description": "Must be \"assistant\" to identify this as the model's response" - }, - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The content of the model's response" - }, - "stop_reason": { - "type": "string", - "enum": [ - "end_of_turn", - "end_of_message", - "out_of_tokens" - ], - "description": "Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: The model finished generating the entire response. - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response. - `StopReason.out_of_tokens`: The model ran out of token budget." - }, - "tool_calls": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolCall" - }, - "description": "List of tool calls. Each tool call is a ToolCall object." - } - }, - "additionalProperties": false, - "required": [ - "role", - "content", - "stop_reason" - ], - "title": "CompletionMessage", - "description": "A message containing the model's (assistant) response in a chat conversation." - }, - "Message": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ], - "discriminator": { - "propertyName": "role", - "mapping": { - "user": "#/components/schemas/UserMessage", - "system": "#/components/schemas/SystemMessage", - "tool": "#/components/schemas/ToolResponseMessage", - "assistant": "#/components/schemas/CompletionMessage" - } - } - }, "ToolCall": { "type": "object", "properties": { @@ -4352,6 +3219,60 @@ "title": "ToolResponseMessage", "description": "A message representing the result of a tool invocation." }, + "TopKSamplingStrategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "top_k", + "default": "top_k" + }, + "top_k": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "type", + "top_k" + ], + "title": "TopKSamplingStrategy" + }, + "TopPSamplingStrategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "top_p", + "default": "top_p" + }, + "temperature": { + "type": "number" + }, + "top_p": { + "type": "number", + "default": 0.95 + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "TopPSamplingStrategy" + }, + "URL": { + "type": "object", + "properties": { + "uri": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "uri" + ], + "title": "URL" + }, "UserMessage": { "type": "object", "properties": { @@ -4675,6 +3596,51 @@ ], "title": "CancelTrainingJobRequest" }, + "ToolConfig": { + "type": "object", + "properties": { + "tool_choice": { + "oneOf": [ + { + "type": "string", + "enum": [ + "auto", + "required", + "none" + ], + "title": "ToolChoice", + "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model." + }, + { + "type": "string" + } + ], + "default": "auto", + "description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto." + }, + "tool_prompt_format": { + "type": "string", + "enum": [ + "json", + "function_tag", + "python_list" + ], + "description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls." + }, + "system_message_behavior": { + "type": "string", + "enum": [ + "append", + "replace" + ], + "description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.", + "default": "append" + } + }, + "additionalProperties": false, + "title": "ToolConfig", + "description": "Configuration for tool use." + }, "ChatCompletionRequest": { "type": "object", "properties": { @@ -4983,6 +3949,227 @@ "title": "CompletionResponseStreamChunk", "description": "A chunk of a streamed completion response." }, + "AgentConfig": { + "type": "object", + "properties": { + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "input_shields": { + "type": "array", + "items": { + "type": "string" + } + }, + "output_shields": { + "type": "array", + "items": { + "type": "string" + } + }, + "toolgroups": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentTool" + } + }, + "client_tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDef" + } + }, + "tool_choice": { + "type": "string", + "enum": [ + "auto", + "required", + "none" + ], + "title": "ToolChoice", + "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.", + "deprecated": true + }, + "tool_prompt_format": { + "type": "string", + "enum": [ + "json", + "function_tag", + "python_list" + ], + "title": "ToolPromptFormat", + "description": "Prompt format for calling custom / zero shot tools.", + "deprecated": true + }, + "tool_config": { + "$ref": "#/components/schemas/ToolConfig" + }, + "max_infer_iters": { + "type": "integer", + "default": 10 + }, + "model": { + "type": "string" + }, + "instructions": { + "type": "string" + }, + "enable_session_persistence": { + "type": "boolean", + "default": false + }, + "response_format": { + "$ref": "#/components/schemas/ResponseFormat" + } + }, + "additionalProperties": false, + "required": [ + "model", + "instructions" + ], + "title": "AgentConfig" + }, + "AgentTool": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "args": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "name", + "args" + ], + "title": "AgentToolGroupWithArgs" + } + ] + }, + "ToolDef": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolParameter" + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "name" + ], + "title": "ToolDef" + }, + "ToolParameter": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "parameter_type": { + "type": "string" + }, + "description": { + "type": "string" + }, + "required": { + "type": "boolean", + "default": true + }, + "default": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "name", + "parameter_type", + "description", + "required" + ], + "title": "ToolParameter" + }, "CreateAgentRequest": { "type": "object", "properties": { @@ -5836,6 +5023,204 @@ "title": "EmbeddingsResponse", "description": "Response containing generated embeddings." }, + "AgentCandidate": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "agent", + "default": "agent" + }, + "config": { + "$ref": "#/components/schemas/AgentConfig" + } + }, + "additionalProperties": false, + "required": [ + "type", + "config" + ], + "title": "AgentCandidate" + }, + "AggregationFunctionType": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType" + }, + "BasicScoringFnParams": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "basic", + "default": "basic" + }, + "aggregation_functions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AggregationFunctionType" + } + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "BasicScoringFnParams" + }, + "BenchmarkConfig": { + "type": "object", + "properties": { + "eval_candidate": { + "$ref": "#/components/schemas/EvalCandidate" + }, + "scoring_params": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ScoringFnParams" + } + }, + "num_examples": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "eval_candidate", + "scoring_params" + ], + "title": "BenchmarkConfig" + }, + "EvalCandidate": { + "oneOf": [ + { + "$ref": "#/components/schemas/ModelCandidate" + }, + { + "$ref": "#/components/schemas/AgentCandidate" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "model": "#/components/schemas/ModelCandidate", + "agent": "#/components/schemas/AgentCandidate" + } + } + }, + "LLMAsJudgeScoringFnParams": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "llm_as_judge", + "default": "llm_as_judge" + }, + "judge_model": { + "type": "string" + }, + "prompt_template": { + "type": "string" + }, + "judge_score_regexes": { + "type": "array", + "items": { + "type": "string" + } + }, + "aggregation_functions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AggregationFunctionType" + } + } + }, + "additionalProperties": false, + "required": [ + "type", + "judge_model" + ], + "title": "LLMAsJudgeScoringFnParams" + }, + "ModelCandidate": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "model", + "default": "model" + }, + "model": { + "type": "string" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "system_message": { + "$ref": "#/components/schemas/SystemMessage" + } + }, + "additionalProperties": false, + "required": [ + "type", + "model", + "sampling_params" + ], + "title": "ModelCandidate" + }, + "RegexParserScoringFnParams": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "regex_parser", + "default": "regex_parser" + }, + "parsing_regexes": { + "type": "array", + "items": { + "type": "string" + } + }, + "aggregation_functions": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AggregationFunctionType" + } + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "RegexParserScoringFnParams" + }, + "ScoringFnParams": { + "oneOf": [ + { + "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams" + }, + { + "$ref": "#/components/schemas/RegexParserScoringFnParams" + }, + { + "$ref": "#/components/schemas/BasicScoringFnParams" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", + "regex_parser": "#/components/schemas/RegexParserScoringFnParams", + "basic": "#/components/schemas/BasicScoringFnParams" + } + } + }, "EvaluateRowsRequest": { "type": "object", "properties": { @@ -5885,6 +5270,115 @@ ], "title": "EvaluateRowsRequest" }, + "EvaluateResponse": { + "type": "object", + "properties": { + "generations": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "scores": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ScoringResult" + } + } + }, + "additionalProperties": false, + "required": [ + "generations", + "scores" + ], + "title": "EvaluateResponse" + }, + "ScoringResult": { + "type": "object", + "properties": { + "score_rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "aggregated_results": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "score_rows", + "aggregated_results" + ], + "title": "ScoringResult" + }, "Session": { "type": "object", "properties": { @@ -5950,6 +5444,70 @@ ], "title": "AgentStepResponse" }, + "Benchmark": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "benchmark", + "default": "benchmark" + }, + "dataset_id": { + "type": "string" + }, + "scoring_functions": { + "type": "array", + "items": { + "type": "string" + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_resource_id", + "provider_id", + "type", + "dataset_id", + "scoring_functions", + "metadata" + ], + "title": "Benchmark" + }, "AgentTurnInputType": { "type": "object", "properties": { @@ -6769,6 +6327,16 @@ "title": "PostTrainingJobArtifactsResponse", "description": "Artifacts of a finetuning job." }, + "JobStatus": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled" + ], + "title": "JobStatus" + }, "PostTrainingJobStatusResponse": { "type": "object", "properties": { @@ -7139,6 +6707,22 @@ "title": "ListBucketResponse", "description": "Response representing a list of file entries." }, + "ListBenchmarksResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Benchmark" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListBenchmarksResponse" + }, "ListDatasetsResponse": { "type": "object", "properties": { @@ -8436,6 +8020,19 @@ ], "title": "RunEvalRequest" }, + "Job": { + "type": "object", + "properties": { + "job_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_id" + ], + "title": "Job" + }, "RunShieldRequest": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index f79120f1d..f8d8ec5fe 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -10,175 +10,6 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: - /v1/eval/tasks/{task_id}/evaluations: - post: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - tags: - - Eval - description: '' - parameters: - - name: task_id - in: path - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/DeprecatedEvaluateRowsRequest' - required: true - deprecated: true - /v1/eval-tasks/{eval_task_id}: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/Benchmark' - - type: 'null' - tags: - - Benchmarks - description: '' - parameters: - - name: eval_task_id - in: path - required: true - schema: - type: string - deprecated: true - /v1/eval/tasks/{task_id}/jobs/{job_id}: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/JobStatus' - - type: 'null' - tags: - - Eval - description: '' - parameters: - - name: task_id - in: path - required: true - schema: - type: string - - name: job_id - in: path - required: true - schema: - type: string - deprecated: true - delete: - responses: - '200': - description: OK - tags: - - Eval - description: '' - parameters: - - name: task_id - in: path - required: true - schema: - type: string - - name: job_id - in: path - required: true - schema: - type: string - deprecated: true - /v1/eval/tasks/{task_id}/jobs/{job_id}/result: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - tags: - - Eval - description: '' - parameters: - - name: task_id - in: path - required: true - schema: - type: string - - name: job_id - in: path - required: true - schema: - type: string - deprecated: true - /v1/eval-tasks: - get: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/ListBenchmarksResponse' - tags: - - Benchmarks - description: '' - parameters: [] - deprecated: true - post: - responses: - '200': - description: OK - tags: - - Benchmarks - description: '' - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/DeprecatedRegisterEvalTaskRequest' - required: true - deprecated: true - /v1/eval/tasks/{task_id}/jobs: - post: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/Job' - tags: - - Eval - description: '' - parameters: - - name: task_id - in: path - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/DeprecatedRunEvalRequest' - required: true - deprecated: true /v1/datasetio/rows: get: responses: @@ -1758,157 +1589,67 @@ jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: schemas: - AgentCandidate: + AppendRowsRequest: type: object properties: - type: + dataset_id: type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object additionalProperties: false required: - - type - - config - title: AgentCandidate - AgentConfig: + - dataset_id + - rows + title: AppendRowsRequest + CompletionMessage: type: object properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: + role: + type: string + const: assistant + default: assistant + description: >- + Must be "assistant" to identify this as the model's response + content: + $ref: '#/components/schemas/InterleavedContent' + description: The content of the model's response + stop_reason: type: string enum: - - auto - - required - - none - title: ToolChoice + - end_of_turn + - end_of_message + - out_of_tokens description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - deprecated: true - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - title: ToolPromptFormat - description: >- - Prompt format for calling custom / zero shot tools. - deprecated: true - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - instructions: - type: string - enable_session_persistence: - type: boolean - default: false - response_format: - $ref: '#/components/schemas/ResponseFormat' - additionalProperties: false - required: - - model - - instructions - title: AgentConfig - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - title: AgentToolGroupWithArgs - AggregationFunctionType: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - BasicScoringFnParams: - type: object - properties: - type: - type: string - const: basic - default: basic - aggregation_functions: + Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: + The model finished generating the entire response. - `StopReason.end_of_message`: + The model finished generating but generated a partial response -- usually, + a tool call. The user may call the tool and continue the conversation + with the tool's response. - `StopReason.out_of_tokens`: The model ran + out of token budget. + tool_calls: type: array items: - $ref: '#/components/schemas/AggregationFunctionType' + $ref: '#/components/schemas/ToolCall' + description: >- + List of tool calls. Each tool call is a ToolCall object. additionalProperties: false required: - - type - title: BasicScoringFnParams - BenchmarkConfig: - type: object - properties: - type: - type: string - const: benchmark - default: benchmark - eval_candidate: - $ref: '#/components/schemas/EvalCandidate' - scoring_params: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringFnParams' - num_examples: - type: integer - additionalProperties: false - required: - - type - - eval_candidate - - scoring_params - title: BenchmarkConfig - EvalCandidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' + - role + - content + - stop_reason + title: CompletionMessage + description: >- + A message containing the model's (assistant) response in a chat conversation. GrammarResponseFormat: type: object properties: @@ -2023,68 +1764,19 @@ components: title: JsonSchemaResponseFormat description: >- Configuration for JSON schema-guided response generation. - LLMAsJudgeScoringFnParams: - type: object - properties: - type: - type: string - const: llm_as_judge - default: llm_as_judge - judge_model: - type: string - prompt_template: - type: string - judge_score_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - additionalProperties: false - required: - - type - - judge_model - title: LLMAsJudgeScoringFnParams - ModelCandidate: - type: object - properties: - type: - type: string - const: model - default: model - model: - type: string - sampling_params: - $ref: '#/components/schemas/SamplingParams' - system_message: - $ref: '#/components/schemas/SystemMessage' - additionalProperties: false - required: - - type - - model - - sampling_params - title: ModelCandidate - RegexParserScoringFnParams: - type: object - properties: - type: - type: string - const: regex_parser - default: regex_parser - parsing_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - $ref: '#/components/schemas/AggregationFunctionType' - additionalProperties: false - required: - - type - title: RegexParserScoringFnParams + Message: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/SystemMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + - $ref: '#/components/schemas/CompletionMessage' + discriminator: + propertyName: role + mapping: + user: '#/components/schemas/UserMessage' + system: '#/components/schemas/SystemMessage' + tool: '#/components/schemas/ToolResponseMessage' + assistant: '#/components/schemas/CompletionMessage' ResponseFormat: oneOf: - $ref: '#/components/schemas/JsonSchemaResponseFormat' @@ -2120,17 +1812,6 @@ components: greedy: '#/components/schemas/GreedySamplingStrategy' top_p: '#/components/schemas/TopPSamplingStrategy' top_k: '#/components/schemas/TopKSamplingStrategy' - ScoringFnParams: - oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - - $ref: '#/components/schemas/RegexParserScoringFnParams' - - $ref: '#/components/schemas/BasicScoringFnParams' - discriminator: - propertyName: type - mapping: - llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' - regex_parser: '#/components/schemas/RegexParserScoringFnParams' - basic: '#/components/schemas/BasicScoringFnParams' SystemMessage: type: object properties: @@ -2171,407 +1852,6 @@ components: - text title: TextContentItem description: A text content item - ToolConfig: - type: object - properties: - tool_choice: - oneOf: - - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following - capabilities of the model. - - type: string - default: auto - description: >- - (Optional) Whether tool use is automatic, required, or none. Can also - specify a tool name to use a specific tool. Defaults to ToolChoice.auto. - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - description: >- - (Optional) Instructs the model how to format tool calls. By default, Llama - Stack will attempt to use a format that is best adapted to the model. - - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a - tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python - syntax -- a list of function calls. - system_message_behavior: - type: string - enum: - - append - - replace - description: >- - (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: - Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: - Replaces the default system prompt with the provided system message. The - system message can include the string '{{function_definitions}}' to indicate - where the function definitions should be inserted. - default: append - additionalProperties: false - title: ToolConfig - description: Configuration for tool use. - ToolDef: - type: object - properties: - name: - type: string - description: - type: string - parameters: - type: array - items: - $ref: '#/components/schemas/ToolParameter' - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - title: ToolDef - ToolParameter: - type: object - properties: - name: - type: string - parameter_type: - type: string - description: - type: string - required: - type: boolean - default: true - default: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - parameter_type - - description - - required - title: ToolParameter - TopKSamplingStrategy: - type: object - properties: - type: - type: string - const: top_k - default: top_k - top_k: - type: integer - additionalProperties: false - required: - - type - - top_k - title: TopKSamplingStrategy - TopPSamplingStrategy: - type: object - properties: - type: - type: string - const: top_p - default: top_p - temperature: - type: number - top_p: - type: number - default: 0.95 - additionalProperties: false - required: - - type - title: TopPSamplingStrategy - URL: - type: object - properties: - uri: - type: string - additionalProperties: false - required: - - uri - title: URL - DeprecatedEvaluateRowsRequest: - type: object - properties: - input_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - scoring_functions: - type: array - items: - type: string - task_config: - $ref: '#/components/schemas/BenchmarkConfig' - additionalProperties: false - required: - - input_rows - - scoring_functions - - task_config - title: DeprecatedEvaluateRowsRequest - EvaluateResponse: - type: object - properties: - generations: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - scores: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - additionalProperties: false - required: - - generations - - scores - title: EvaluateResponse - ScoringResult: - type: object - properties: - score_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - aggregated_results: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - score_rows - - aggregated_results - title: ScoringResult - Benchmark: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - const: benchmark - default: benchmark - dataset_id: - type: string - scoring_functions: - type: array - items: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - identifier - - provider_resource_id - - provider_id - - type - - dataset_id - - scoring_functions - - metadata - title: Benchmark - JobStatus: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - title: JobStatus - ListBenchmarksResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Benchmark' - additionalProperties: false - required: - - data - title: ListBenchmarksResponse - DeprecatedRegisterEvalTaskRequest: - type: object - properties: - eval_task_id: - type: string - dataset_id: - type: string - scoring_functions: - type: array - items: - type: string - provider_benchmark_id: - type: string - provider_id: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - eval_task_id - - dataset_id - - scoring_functions - title: DeprecatedRegisterEvalTaskRequest - DeprecatedRunEvalRequest: - type: object - properties: - task_config: - $ref: '#/components/schemas/BenchmarkConfig' - additionalProperties: false - required: - - task_config - title: DeprecatedRunEvalRequest - Job: - type: object - properties: - job_id: - type: string - additionalProperties: false - required: - - job_id - title: Job - AppendRowsRequest: - type: object - properties: - dataset_id: - type: string - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - dataset_id - - rows - title: AppendRowsRequest - CompletionMessage: - type: object - properties: - role: - type: string - const: assistant - default: assistant - description: >- - Must be "assistant" to identify this as the model's response - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content of the model's response - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: - The model finished generating the entire response. - `StopReason.end_of_message`: - The model finished generating but generated a partial response -- usually, - a tool call. The user may call the tool and continue the conversation - with the tool's response. - `StopReason.out_of_tokens`: The model ran - out of token budget. - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolCall' - description: >- - List of tool calls. Each tool call is a ToolCall object. - additionalProperties: false - required: - - role - - content - - stop_reason - title: CompletionMessage - description: >- - A message containing the model's (assistant) response in a chat conversation. - Message: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/SystemMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - - $ref: '#/components/schemas/CompletionMessage' - discriminator: - propertyName: role - mapping: - user: '#/components/schemas/UserMessage' - system: '#/components/schemas/SystemMessage' - tool: '#/components/schemas/ToolResponseMessage' - assistant: '#/components/schemas/CompletionMessage' ToolCall: type: object properties: @@ -2699,6 +1979,45 @@ components: title: ToolResponseMessage description: >- A message representing the result of a tool invocation. + TopKSamplingStrategy: + type: object + properties: + type: + type: string + const: top_k + default: top_k + top_k: + type: integer + additionalProperties: false + required: + - type + - top_k + title: TopKSamplingStrategy + TopPSamplingStrategy: + type: object + properties: + type: + type: string + const: top_p + default: top_p + temperature: + type: number + top_p: + type: number + default: 0.95 + additionalProperties: false + required: + - type + title: TopPSamplingStrategy + URL: + type: object + properties: + uri: + type: string + additionalProperties: false + required: + - uri + title: URL UserMessage: type: object properties: @@ -2938,6 +2257,54 @@ components: required: - job_uuid title: CancelTrainingJobRequest + ToolConfig: + type: object + properties: + tool_choice: + oneOf: + - type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following + capabilities of the model. + - type: string + default: auto + description: >- + (Optional) Whether tool use is automatic, required, or none. Can also + specify a tool name to use a specific tool. Defaults to ToolChoice.auto. + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + description: >- + (Optional) Instructs the model how to format tool calls. By default, Llama + Stack will attempt to use a format that is best adapted to the model. + - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. + - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a + tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python + syntax -- a list of function calls. + system_message_behavior: + type: string + enum: + - append + - replace + description: >- + (Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: + Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: + Replaces the default system prompt with the provided system message. The + system message can include the string '{{function_definitions}}' to indicate + where the function definitions should be inserted. + default: append + additionalProperties: false + title: ToolConfig + description: Configuration for tool use. ChatCompletionRequest: type: object properties: @@ -3201,6 +2568,142 @@ components: title: CompletionResponseStreamChunk description: >- A chunk of a streamed completion response. + AgentConfig: + type: object + properties: + sampling_params: + $ref: '#/components/schemas/SamplingParams' + input_shields: + type: array + items: + type: string + output_shields: + type: array + items: + type: string + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + client_tools: + type: array + items: + $ref: '#/components/schemas/ToolDef' + tool_choice: + type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following capabilities + of the model. + deprecated: true + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + title: ToolPromptFormat + description: >- + Prompt format for calling custom / zero shot tools. + deprecated: true + tool_config: + $ref: '#/components/schemas/ToolConfig' + max_infer_iters: + type: integer + default: 10 + model: + type: string + instructions: + type: string + enable_session_persistence: + type: boolean + default: false + response_format: + $ref: '#/components/schemas/ResponseFormat' + additionalProperties: false + required: + - model + - instructions + title: AgentConfig + AgentTool: + oneOf: + - type: string + - type: object + properties: + name: + type: string + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + - args + title: AgentToolGroupWithArgs + ToolDef: + type: object + properties: + name: + type: string + description: + type: string + parameters: + type: array + items: + $ref: '#/components/schemas/ToolParameter' + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + title: ToolDef + ToolParameter: + type: object + properties: + name: + type: string + parameter_type: + type: string + description: + type: string + required: + type: boolean + default: true + default: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + - parameter_type + - description + - required + title: ToolParameter CreateAgentRequest: type: object properties: @@ -3789,6 +3292,141 @@ components: title: EmbeddingsResponse description: >- Response containing generated embeddings. + AgentCandidate: + type: object + properties: + type: + type: string + const: agent + default: agent + config: + $ref: '#/components/schemas/AgentConfig' + additionalProperties: false + required: + - type + - config + title: AgentCandidate + AggregationFunctionType: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + BasicScoringFnParams: + type: object + properties: + type: + type: string + const: basic + default: basic + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + additionalProperties: false + required: + - type + title: BasicScoringFnParams + BenchmarkConfig: + type: object + properties: + eval_candidate: + $ref: '#/components/schemas/EvalCandidate' + scoring_params: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringFnParams' + num_examples: + type: integer + additionalProperties: false + required: + - eval_candidate + - scoring_params + title: BenchmarkConfig + EvalCandidate: + oneOf: + - $ref: '#/components/schemas/ModelCandidate' + - $ref: '#/components/schemas/AgentCandidate' + discriminator: + propertyName: type + mapping: + model: '#/components/schemas/ModelCandidate' + agent: '#/components/schemas/AgentCandidate' + LLMAsJudgeScoringFnParams: + type: object + properties: + type: + type: string + const: llm_as_judge + default: llm_as_judge + judge_model: + type: string + prompt_template: + type: string + judge_score_regexes: + type: array + items: + type: string + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + additionalProperties: false + required: + - type + - judge_model + title: LLMAsJudgeScoringFnParams + ModelCandidate: + type: object + properties: + type: + type: string + const: model + default: model + model: + type: string + sampling_params: + $ref: '#/components/schemas/SamplingParams' + system_message: + $ref: '#/components/schemas/SystemMessage' + additionalProperties: false + required: + - type + - model + - sampling_params + title: ModelCandidate + RegexParserScoringFnParams: + type: object + properties: + type: + type: string + const: regex_parser + default: regex_parser + parsing_regexes: + type: array + items: + type: string + aggregation_functions: + type: array + items: + $ref: '#/components/schemas/AggregationFunctionType' + additionalProperties: false + required: + - type + title: RegexParserScoringFnParams + ScoringFnParams: + oneOf: + - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + - $ref: '#/components/schemas/RegexParserScoringFnParams' + - $ref: '#/components/schemas/BasicScoringFnParams' + discriminator: + propertyName: type + mapping: + llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + regex_parser: '#/components/schemas/RegexParserScoringFnParams' + basic: '#/components/schemas/BasicScoringFnParams' EvaluateRowsRequest: type: object properties: @@ -3816,6 +3454,60 @@ components: - scoring_functions - task_config title: EvaluateRowsRequest + EvaluateResponse: + type: object + properties: + generations: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + scores: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + additionalProperties: false + required: + - generations + - scores + title: EvaluateResponse + ScoringResult: + type: object + properties: + score_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + aggregated_results: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - score_rows + - aggregated_results + title: ScoringResult Session: type: object properties: @@ -3859,6 +3551,45 @@ components: required: - step title: AgentStepResponse + Benchmark: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + const: benchmark + default: benchmark + dataset_id: + type: string + scoring_functions: + type: array + items: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - identifier + - provider_resource_id + - provider_id + - type + - dataset_id + - scoring_functions + - metadata + title: Benchmark AgentTurnInputType: type: object properties: @@ -4375,6 +4106,14 @@ components: - checkpoints title: PostTrainingJobArtifactsResponse description: Artifacts of a finetuning job. + JobStatus: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + title: JobStatus PostTrainingJobStatusResponse: type: object properties: @@ -4603,6 +4342,17 @@ components: title: ListBucketResponse description: >- Response representing a list of file entries. + ListBenchmarksResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Benchmark' + additionalProperties: false + required: + - data + title: ListBenchmarksResponse ListDatasetsResponse: type: object properties: @@ -5429,6 +5179,15 @@ components: required: - task_config title: RunEvalRequest + Job: + type: object + properties: + job_id: + type: string + additionalProperties: false + required: + - job_id + title: Job RunShieldRequest: type: object properties: diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 8eecf84ab..f3f41b18a 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -1017,14 +1017,14 @@ " \"content\": SYSTEM_PROMPT_TEMPLATE.format(subject=subset),\n", "}\n", "\n", - "client.eval_tasks.register(\n", - " eval_task_id=\"meta-reference::mmmu\",\n", + "client.benchmarks.register(\n", + " benchmark_id=\"meta-reference::mmmu\",\n", " dataset_id=f\"mmmu-{subset}-{split}\",\n", " scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n", ")\n", "\n", - "response = client.eval.evaluate_rows(\n", - " task_id=\"meta-reference::mmmu\",\n", + "response = client.eval.evaluate_rows_alpha(\n", + " benchmark_id=\"meta-reference::mmmu\",\n", " input_rows=eval_rows,\n", " scoring_functions=[\"basic::regex_parser_multiple_choice_answer\"],\n", " task_config={\n", @@ -1196,14 +1196,14 @@ " provider_id=\"together\",\n", ")\n", "\n", - "client.eval_tasks.register(\n", - " eval_task_id=\"meta-reference::simpleqa\",\n", + "client.benchmarks.register(\n", + " benchmark_id=\"meta-reference::simpleqa\",\n", " dataset_id=simpleqa_dataset_id,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", ")\n", "\n", - "response = client.eval.evaluate_rows(\n", - " task_id=\"meta-reference::simpleqa\",\n", + "response = client.eval.evaluate_rows_alpha(\n", + " benchmark_id=\"meta-reference::simpleqa\",\n", " input_rows=eval_rows.rows,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", " task_config={\n", @@ -1351,8 +1351,8 @@ " \"enable_session_persistence\": False,\n", "}\n", "\n", - "response = client.eval.evaluate_rows(\n", - " task_id=\"meta-reference::simpleqa\",\n", + "response = client.eval.evaluate_rows_alpha(\n", + " benchmark_id=\"meta-reference::simpleqa\",\n", " input_rows=eval_rows.rows,\n", " scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n", " task_config={\n", diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py index 91b1ca927..39ba355e9 100644 --- a/llama_stack/apis/benchmarks/benchmarks.py +++ b/llama_stack/apis/benchmarks/benchmarks.py @@ -64,23 +64,3 @@ class Benchmarks(Protocol): provider_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> None: ... - - @webmethod(route="/eval-tasks", method="GET") - async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse: ... - - @webmethod(route="/eval-tasks/{eval_task_id}", method="GET") - async def DEPRECATED_get_eval_task( - self, - eval_task_id: str, - ) -> Optional[Benchmark]: ... - - @webmethod(route="/eval-tasks", method="POST") - async def DEPRECATED_register_eval_task( - self, - eval_task_id: str, - dataset_id: str, - scoring_functions: List[str], - provider_benchmark_id: Optional[str] = None, - provider_id: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, - ) -> None: ... diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index e2ff4458e..a7b2e7670 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -39,7 +39,6 @@ EvalCandidate = register_schema( @json_schema_type class BenchmarkConfig(BaseModel): - type: Literal["benchmark"] = "benchmark" eval_candidate: EvalCandidate scoring_params: Dict[str, ScoringFnParams] = Field( description="Map between scoring function id and parameters for each scoring function you want to run", @@ -84,28 +83,3 @@ class Eval(Protocol): @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET") async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ... - - @webmethod(route="/eval/tasks/{task_id}/jobs", method="POST") - async def DEPRECATED_run_eval( - self, - task_id: str, - task_config: BenchmarkConfig, - ) -> Job: ... - - @webmethod(route="/eval/tasks/{task_id}/evaluations", method="POST") - async def DEPRECATED_evaluate_rows( - self, - task_id: str, - input_rows: List[Dict[str, Any]], - scoring_functions: List[str], - task_config: BenchmarkConfig, - ) -> EvaluateResponse: ... - - @webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="GET") - async def DEPRECATED_job_status(self, task_id: str, job_id: str) -> Optional[JobStatus]: ... - - @webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}", method="DELETE") - async def DEPRECATED_job_cancel(self, task_id: str, job_id: str) -> None: ... - - @webmethod(route="/eval/tasks/{task_id}/jobs/{job_id}/result", method="GET") - async def DEPRECATED_job_result(self, task_id: str, job_id: str) -> EvaluateResponse: ... diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 9d12c8a40..016ca4984 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -411,48 +411,6 @@ class EvalRouter(Eval): job_id, ) - async def DEPRECATED_run_eval( - self, - task_id: str, - task_config: BenchmarkConfig, - ) -> Job: - return await self.run_eval(benchmark_id=task_id, task_config=task_config) - - async def DEPRECATED_evaluate_rows( - self, - task_id: str, - input_rows: List[Dict[str, Any]], - scoring_functions: List[str], - task_config: BenchmarkConfig, - ) -> EvaluateResponse: - return await self.evaluate_rows( - benchmark_id=task_id, - input_rows=input_rows, - scoring_functions=scoring_functions, - task_config=task_config, - ) - - async def DEPRECATED_job_status( - self, - task_id: str, - job_id: str, - ) -> Optional[JobStatus]: - return await self.job_status(benchmark_id=task_id, job_id=job_id) - - async def DEPRECATED_job_cancel( - self, - task_id: str, - job_id: str, - ) -> None: - return await self.job_cancel(benchmark_id=task_id, job_id=job_id) - - async def DEPRECATED_job_result( - self, - task_id: str, - job_id: str, - ) -> EvaluateResponse: - return await self.job_result(benchmark_id=task_id, job_id=job_id) - class ToolRuntimeRouter(ToolRuntime): class RagToolImpl(RAGToolRuntime): diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 2cddc3970..c2434e517 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -468,35 +468,6 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): ) await self.register_object(benchmark) - async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse: - logger.warning("DEPRECATED: Use /eval/benchmarks instead") - return await self.list_benchmarks() - - async def DEPRECATED_get_eval_task( - self, - eval_task_id: str, - ) -> Optional[Benchmark]: - logger.warning("DEPRECATED: Use /eval/benchmarks instead") - return await self.get_benchmark(eval_task_id) - - async def DEPRECATED_register_eval_task( - self, - eval_task_id: str, - dataset_id: str, - scoring_functions: List[str], - provider_benchmark_id: Optional[str] = None, - provider_id: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, - ) -> None: - logger.warning("DEPRECATED: Use /eval/benchmarks instead") - return await self.register_benchmark( - benchmark_id=eval_task_id, - dataset_id=dataset_id, - scoring_functions=scoring_functions, - metadata=metadata, - provider_benchmark_id=provider_benchmark_id, - ) - class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): async def list_tools(self, toolgroup_id: Optional[str] = None) -> ListToolsResponse: diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 0f77b7347..18d408a31 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -234,45 +234,3 @@ class MetaReferenceEvalImpl( raise ValueError(f"Job is not completed, Status: {status.value}") return self.jobs[job_id] - - async def DEPRECATED_run_eval( - self, - task_id: str, - task_config: BenchmarkConfig, - ) -> Job: - return await self.run_eval(benchmark_id=task_id, task_config=task_config) - - async def DEPRECATED_evaluate_rows( - self, - task_id: str, - input_rows: List[Dict[str, Any]], - scoring_functions: List[str], - task_config: BenchmarkConfig, - ) -> EvaluateResponse: - return await self.evaluate_rows( - benchmark_id=task_id, - input_rows=input_rows, - scoring_functions=scoring_functions, - task_config=task_config, - ) - - async def DEPRECATED_job_status( - self, - task_id: str, - job_id: str, - ) -> Optional[JobStatus]: - return await self.job_status(benchmark_id=task_id, job_id=job_id) - - async def DEPRECATED_job_cancel( - self, - task_id: str, - job_id: str, - ) -> None: - return await self.job_cancel(benchmark_id=task_id, job_id=job_id) - - async def DEPRECATED_job_result( - self, - task_id: str, - job_id: str, - ) -> EvaluateResponse: - return await self.job_result(benchmark_id=task_id, job_id=job_id)