refactor(agent): drop AgentToolGroup for responses tools

This commit is contained in:
Ashwin Bharambe 2025-10-10 13:43:43 -07:00
parent c56b2deb7d
commit ce44b9d6f6
12 changed files with 4051 additions and 4225 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -15576,8 +15576,20 @@
"AgentConfig": {
"type": "object",
"properties": {
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
"max_output_tokens": {
"type": "integer"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"stop": {
"type": "array",
"items": {
"type": "string"
}
},
"input_shields": {
"type": "array",
@ -15591,40 +15603,25 @@
"type": "string"
}
},
"toolgroups": {
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
"$ref": "#/components/schemas/OpenAIResponseInputTool"
}
},
"client_tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolDef"
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseInputTool"
},
{
"$ref": "#/components/schemas/ToolDef"
}
]
}
},
"tool_choice": {
"type": "string",
"enum": [
"auto",
"required",
"none"
],
"title": "ToolChoice",
"description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
"deprecated": true
},
"tool_prompt_format": {
"type": "string",
"enum": [
"json",
"function_tag",
"python_list"
],
"title": "ToolPromptFormat",
"description": "Prompt format for calling custom / zero shot tools.",
"deprecated": true
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
},
@ -15650,7 +15647,7 @@
"description": "Optional flag indicating whether session data has to be persisted"
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"$ref": "#/components/schemas/OpenAIResponseFormatParam",
"description": "Optional response format configuration"
}
},
@ -15662,232 +15659,6 @@
"title": "AgentConfig",
"description": "Configuration for an agent."
},
"AgentTool": {
"oneOf": [
{
"type": "string"
},
{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"args": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"name",
"args"
],
"title": "AgentToolGroupWithArgs"
}
]
},
"GrammarResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"json_schema",
"grammar"
],
"description": "Must be \"grammar\" to identify this format type",
"const": "grammar",
"default": "grammar"
},
"bnf": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "The BNF grammar specification the response should conform to"
}
},
"additionalProperties": false,
"required": [
"type",
"bnf"
],
"title": "GrammarResponseFormat",
"description": "Configuration for grammar-guided response generation."
},
"GreedySamplingStrategy": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "greedy",
"default": "greedy",
"description": "Must be \"greedy\" to identify this sampling strategy"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "GreedySamplingStrategy",
"description": "Greedy sampling strategy that selects the highest probability token at each step."
},
"JsonSchemaResponseFormat": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"json_schema",
"grammar"
],
"description": "Must be \"json_schema\" to identify this format type",
"const": "json_schema",
"default": "json_schema"
},
"json_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model."
}
},
"additionalProperties": false,
"required": [
"type",
"json_schema"
],
"title": "JsonSchemaResponseFormat",
"description": "Configuration for JSON schema-guided response generation."
},
"ResponseFormat": {
"oneOf": [
{
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
},
{
"$ref": "#/components/schemas/GrammarResponseFormat"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
"grammar": "#/components/schemas/GrammarResponseFormat"
}
}
},
"SamplingParams": {
"type": "object",
"properties": {
"strategy": {
"oneOf": [
{
"$ref": "#/components/schemas/GreedySamplingStrategy"
},
{
"$ref": "#/components/schemas/TopPSamplingStrategy"
},
{
"$ref": "#/components/schemas/TopKSamplingStrategy"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"greedy": "#/components/schemas/GreedySamplingStrategy",
"top_p": "#/components/schemas/TopPSamplingStrategy",
"top_k": "#/components/schemas/TopKSamplingStrategy"
}
},
"description": "The sampling strategy."
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
"type": "number",
"default": 1.0,
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
},
"stop": {
"type": "array",
"items": {
"type": "string"
},
"description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
}
},
"additionalProperties": false,
"required": [
"strategy"
],
"title": "SamplingParams",
"description": "Sampling parameters."
},
"ToolConfig": {
"type": "object",
"properties": {
@ -15933,54 +15704,6 @@
"title": "ToolConfig",
"description": "Configuration for tool use."
},
"TopKSamplingStrategy": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "top_k",
"default": "top_k",
"description": "Must be \"top_k\" to identify this sampling strategy"
},
"top_k": {
"type": "integer",
"description": "Number of top tokens to consider for sampling. Must be at least 1"
}
},
"additionalProperties": false,
"required": [
"type",
"top_k"
],
"title": "TopKSamplingStrategy",
"description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
},
"TopPSamplingStrategy": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "top_p",
"default": "top_p",
"description": "Must be \"top_p\" to identify this sampling strategy"
},
"temperature": {
"type": "number",
"description": "Controls randomness in sampling. Higher values increase randomness"
},
"top_p": {
"type": "number",
"default": 0.95,
"description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "TopPSamplingStrategy",
"description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
},
"CreateAgentRequest": {
"type": "object",
"properties": {
@ -16100,8 +15823,11 @@
"default": "inference"
},
"model_response": {
"$ref": "#/components/schemas/CompletionMessage",
"$ref": "#/components/schemas/OpenAIAssistantMessageParam",
"description": "The response from the LLM."
},
"finish_reason": {
"type": "string"
}
},
"additionalProperties": false,
@ -16153,7 +15879,17 @@
"description": "The IDs of the vector databases to retrieve context from."
},
"inserted_context": {
"$ref": "#/components/schemas/InterleavedContent",
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
}
}
],
"description": "The context retrieved from the vector databases."
}
},
@ -16287,14 +16023,14 @@
"tool_calls": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolCall"
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
},
"description": "The tool calls to execute."
},
"tool_responses": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponse"
"$ref": "#/components/schemas/OpenAIToolMessageParam"
},
"description": "The tool responses from the tool calls."
}
@ -16310,71 +16046,6 @@
"title": "ToolExecutionStep",
"description": "A tool execution step in an agent turn."
},
"ToolResponse": {
"type": "object",
"properties": {
"call_id": {
"type": "string",
"description": "Unique identifier for the tool call this response is for"
},
"tool_name": {
"oneOf": [
{
"type": "string",
"enum": [
"brave_search",
"wolfram_alpha",
"photogen",
"code_interpreter"
],
"title": "BuiltinTool"
},
{
"type": "string"
}
],
"description": "Name of the tool that was invoked"
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The response content from the tool"
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Additional metadata about the tool response"
}
},
"additionalProperties": false,
"required": [
"call_id",
"tool_name",
"content"
],
"title": "ToolResponse",
"description": "Response from a tool invocation."
},
"Turn": {
"type": "object",
"properties": {
@ -16389,14 +16060,7 @@
"input_messages": {
"type": "array",
"items": {
"oneOf": [
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/ToolResponseMessage"
}
]
"$ref": "#/components/schemas/OpenAIMessageParam"
},
"description": "List of messages that initiated this turn"
},
@ -16430,9 +16094,12 @@
"description": "Ordered list of processing steps executed during this turn"
},
"output_message": {
"$ref": "#/components/schemas/CompletionMessage",
"$ref": "#/components/schemas/OpenAIAssistantMessageParam",
"description": "The model's generated response containing content and metadata"
},
"finish_reason": {
"type": "string"
},
"output_attachments": {
"type": "array",
"items": {
@ -16443,13 +16110,10 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/InterleavedContentItem"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContentItem"
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
}
},
{
@ -16502,14 +16166,7 @@
"messages": {
"type": "array",
"items": {
"oneOf": [
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/ToolResponseMessage"
}
]
"$ref": "#/components/schemas/OpenAIMessageParam"
},
"description": "List of messages to start the turn with."
},
@ -16527,13 +16184,10 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/InterleavedContentItem"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContentItem"
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
}
},
{
@ -16557,12 +16211,12 @@
},
"description": "(Optional) List of documents to create the turn with."
},
"toolgroups": {
"tools": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
"$ref": "#/components/schemas/OpenAIResponseInputTool"
},
"description": "(Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request."
"description": "(Optional) List of tools to create the turn with, will be used in addition to the agent's config tools for the request."
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig",
@ -17015,7 +16669,7 @@
"tool_responses": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponse"
"$ref": "#/components/schemas/OpenAIToolMessageParam"
},
"description": "The tool call responses to resume the turn with."
},
@ -17285,6 +16939,23 @@
"title": "BenchmarkConfig",
"description": "A benchmark configuration for evaluation."
},
"GreedySamplingStrategy": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "greedy",
"default": "greedy",
"description": "Must be \"greedy\" to identify this sampling strategy"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "GreedySamplingStrategy",
"description": "Greedy sampling strategy that selects the highest probability token at each step."
},
"ModelCandidate": {
"type": "object",
"properties": {
@ -17315,6 +16986,104 @@
"title": "ModelCandidate",
"description": "A model candidate for evaluation."
},
"SamplingParams": {
"type": "object",
"properties": {
"strategy": {
"oneOf": [
{
"$ref": "#/components/schemas/GreedySamplingStrategy"
},
{
"$ref": "#/components/schemas/TopPSamplingStrategy"
},
{
"$ref": "#/components/schemas/TopKSamplingStrategy"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"greedy": "#/components/schemas/GreedySamplingStrategy",
"top_p": "#/components/schemas/TopPSamplingStrategy",
"top_k": "#/components/schemas/TopKSamplingStrategy"
}
},
"description": "The sampling strategy."
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
"type": "number",
"default": 1.0,
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
},
"stop": {
"type": "array",
"items": {
"type": "string"
},
"description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
}
},
"additionalProperties": false,
"required": [
"strategy"
],
"title": "SamplingParams",
"description": "Sampling parameters."
},
"TopKSamplingStrategy": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "top_k",
"default": "top_k",
"description": "Must be \"top_k\" to identify this sampling strategy"
},
"top_k": {
"type": "integer",
"description": "Number of top tokens to consider for sampling. Must be at least 1"
}
},
"additionalProperties": false,
"required": [
"type",
"top_k"
],
"title": "TopKSamplingStrategy",
"description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
},
"TopPSamplingStrategy": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "top_p",
"default": "top_p",
"description": "Must be \"top_p\" to identify this sampling strategy"
},
"temperature": {
"type": "number",
"description": "Controls randomness in sampling. Higher values increase randomness"
},
"top_p": {
"type": "number",
"default": 0.95,
"description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "TopPSamplingStrategy",
"description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
},
"EvaluateRowsRequest": {
"type": "object",
"properties": {

View file

@ -11663,8 +11663,16 @@ components:
AgentConfig:
type: object
properties:
sampling_params:
$ref: '#/components/schemas/SamplingParams'
max_output_tokens:
type: integer
temperature:
type: number
top_p:
type: number
stop:
type: array
items:
type: string
input_shields:
type: array
items:
@ -11673,36 +11681,16 @@ components:
type: array
items:
type: string
toolgroups:
tools:
type: array
items:
$ref: '#/components/schemas/AgentTool'
$ref: '#/components/schemas/OpenAIResponseInputTool'
client_tools:
type: array
items:
$ref: '#/components/schemas/ToolDef'
tool_choice:
type: string
enum:
- auto
- required
- none
title: ToolChoice
description: >-
Whether tool use is required or automatic. This is a hint to the model
which may not be followed. It depends on the Instruction Following capabilities
of the model.
deprecated: true
tool_prompt_format:
type: string
enum:
- json
- function_tag
- python_list
title: ToolPromptFormat
description: >-
Prompt format for calling custom / zero shot tools.
deprecated: true
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputTool'
- $ref: '#/components/schemas/ToolDef'
tool_config:
$ref: '#/components/schemas/ToolConfig'
max_infer_iters:
@ -11725,7 +11713,7 @@ components:
description: >-
Optional flag indicating whether session data has to be persisted
response_format:
$ref: '#/components/schemas/ResponseFormat'
$ref: '#/components/schemas/OpenAIResponseFormatParam'
description: Optional response format configuration
additionalProperties: false
required:
@ -11733,157 +11721,6 @@ components:
- instructions
title: AgentConfig
description: Configuration for an agent.
AgentTool:
oneOf:
- type: string
- type: object
properties:
name:
type: string
args:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- name
- args
title: AgentToolGroupWithArgs
GrammarResponseFormat:
type: object
properties:
type:
type: string
enum:
- json_schema
- grammar
description: >-
Must be "grammar" to identify this format type
const: grammar
default: grammar
bnf:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The BNF grammar specification the response should conform to
additionalProperties: false
required:
- type
- bnf
title: GrammarResponseFormat
description: >-
Configuration for grammar-guided response generation.
GreedySamplingStrategy:
type: object
properties:
type:
type: string
const: greedy
default: greedy
description: >-
Must be "greedy" to identify this sampling strategy
additionalProperties: false
required:
- type
title: GreedySamplingStrategy
description: >-
Greedy sampling strategy that selects the highest probability token at each
step.
JsonSchemaResponseFormat:
type: object
properties:
type:
type: string
enum:
- json_schema
- grammar
description: >-
Must be "json_schema" to identify this format type
const: json_schema
default: json_schema
json_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The JSON schema the response should conform to. In a Python SDK, this
is often a `pydantic` model.
additionalProperties: false
required:
- type
- json_schema
title: JsonSchemaResponseFormat
description: >-
Configuration for JSON schema-guided response generation.
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
- $ref: '#/components/schemas/GrammarResponseFormat'
discriminator:
propertyName: type
mapping:
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
grammar: '#/components/schemas/GrammarResponseFormat'
SamplingParams:
type: object
properties:
strategy:
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'
- $ref: '#/components/schemas/TopPSamplingStrategy'
- $ref: '#/components/schemas/TopKSamplingStrategy'
discriminator:
propertyName: type
mapping:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
context length.
repetition_penalty:
type: number
default: 1.0
description: >-
Number between -2.0 and 2.0. Positive values penalize new tokens based
on whether they appear in the text so far, increasing the model's likelihood
to talk about new topics.
stop:
type: array
items:
type: string
description: >-
Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
additionalProperties: false
required:
- strategy
title: SamplingParams
description: Sampling parameters.
ToolConfig:
type: object
properties:
@ -11932,51 +11769,6 @@ components:
additionalProperties: false
title: ToolConfig
description: Configuration for tool use.
TopKSamplingStrategy:
type: object
properties:
type:
type: string
const: top_k
default: top_k
description: >-
Must be "top_k" to identify this sampling strategy
top_k:
type: integer
description: >-
Number of top tokens to consider for sampling. Must be at least 1
additionalProperties: false
required:
- type
- top_k
title: TopKSamplingStrategy
description: >-
Top-k sampling strategy that restricts sampling to the k most likely tokens.
TopPSamplingStrategy:
type: object
properties:
type:
type: string
const: top_p
default: top_p
description: >-
Must be "top_p" to identify this sampling strategy
temperature:
type: number
description: >-
Controls randomness in sampling. Higher values increase randomness
top_p:
type: number
default: 0.95
description: >-
Cumulative probability threshold for nucleus sampling. Defaults to 0.95
additionalProperties: false
required:
- type
title: TopPSamplingStrategy
description: >-
Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
with cumulative probability >= p.
CreateAgentRequest:
type: object
properties:
@ -12072,8 +11864,10 @@ components:
const: inference
default: inference
model_response:
$ref: '#/components/schemas/CompletionMessage'
$ref: '#/components/schemas/OpenAIAssistantMessageParam'
description: The response from the LLM.
finish_reason:
type: string
additionalProperties: false
required:
- turn_id
@ -12115,7 +11909,11 @@ components:
description: >-
The IDs of the vector databases to retrieve context from.
inserted_context:
$ref: '#/components/schemas/InterleavedContent'
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The context retrieved from the vector databases.
additionalProperties: false
@ -12226,12 +12024,12 @@ components:
tool_calls:
type: array
items:
$ref: '#/components/schemas/ToolCall'
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
description: The tool calls to execute.
tool_responses:
type: array
items:
$ref: '#/components/schemas/ToolResponse'
$ref: '#/components/schemas/OpenAIToolMessageParam'
description: The tool responses from the tool calls.
additionalProperties: false
required:
@ -12242,46 +12040,6 @@ components:
- tool_responses
title: ToolExecutionStep
description: A tool execution step in an agent turn.
ToolResponse:
type: object
properties:
call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
description: Name of the tool that was invoked
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Additional metadata about the tool response
additionalProperties: false
required:
- call_id
- tool_name
- content
title: ToolResponse
description: Response from a tool invocation.
Turn:
type: object
properties:
@ -12296,9 +12054,7 @@ components:
input_messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
$ref: '#/components/schemas/OpenAIMessageParam'
description: >-
List of messages that initiated this turn
steps:
@ -12319,9 +12075,11 @@ components:
description: >-
Ordered list of processing steps executed during this turn
output_message:
$ref: '#/components/schemas/CompletionMessage'
$ref: '#/components/schemas/OpenAIAssistantMessageParam'
description: >-
The model's generated response containing content and metadata
finish_reason:
type: string
output_attachments:
type: array
items:
@ -12330,10 +12088,9 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
- $ref: '#/components/schemas/URL'
description: The content of the attachment.
mime_type:
@ -12373,9 +12130,7 @@ components:
messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
$ref: '#/components/schemas/OpenAIMessageParam'
description: List of messages to start the turn with.
stream:
type: boolean
@ -12390,10 +12145,9 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
- $ref: '#/components/schemas/URL'
description: The content of the document.
mime_type:
@ -12407,13 +12161,13 @@ components:
description: A document to be used by an agent.
description: >-
(Optional) List of documents to create the turn with.
toolgroups:
tools:
type: array
items:
$ref: '#/components/schemas/AgentTool'
$ref: '#/components/schemas/OpenAIResponseInputTool'
description: >-
(Optional) List of toolgroups to create the turn with, will be used in
addition to the agent's config toolgroups for the request.
(Optional) List of tools to create the turn with, will be used in addition
to the agent's config tools for the request.
tool_config:
$ref: '#/components/schemas/ToolConfig'
description: >-
@ -12764,7 +12518,7 @@ components:
tool_responses:
type: array
items:
$ref: '#/components/schemas/ToolResponse'
$ref: '#/components/schemas/OpenAIToolMessageParam'
description: >-
The tool call responses to resume the turn with.
stream:
@ -12955,6 +12709,22 @@ components:
title: BenchmarkConfig
description: >-
A benchmark configuration for evaluation.
GreedySamplingStrategy:
type: object
properties:
type:
type: string
const: greedy
default: greedy
description: >-
Must be "greedy" to identify this sampling strategy
additionalProperties: false
required:
- type
title: GreedySamplingStrategy
description: >-
Greedy sampling strategy that selects the highest probability token at each
step.
ModelCandidate:
type: object
properties:
@ -12980,6 +12750,92 @@ components:
- sampling_params
title: ModelCandidate
description: A model candidate for evaluation.
SamplingParams:
type: object
properties:
strategy:
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'
- $ref: '#/components/schemas/TopPSamplingStrategy'
- $ref: '#/components/schemas/TopKSamplingStrategy'
discriminator:
propertyName: type
mapping:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
context length.
repetition_penalty:
type: number
default: 1.0
description: >-
Number between -2.0 and 2.0. Positive values penalize new tokens based
on whether they appear in the text so far, increasing the model's likelihood
to talk about new topics.
stop:
type: array
items:
type: string
description: >-
Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
additionalProperties: false
required:
- strategy
title: SamplingParams
description: Sampling parameters.
TopKSamplingStrategy:
type: object
properties:
type:
type: string
const: top_k
default: top_k
description: >-
Must be "top_k" to identify this sampling strategy
top_k:
type: integer
description: >-
Number of top tokens to consider for sampling. Must be at least 1
additionalProperties: false
required:
- type
- top_k
title: TopKSamplingStrategy
description: >-
Top-k sampling strategy that restricts sampling to the k most likely tokens.
TopPSamplingStrategy:
type: object
properties:
type:
type: string
const: top_p
default: top_p
description: >-
Must be "top_p" to identify this sampling strategy
temperature:
type: number
description: >-
Controls randomness in sampling. Higher values increase randomness
top_p:
type: number
default: 0.95
description: >-
Cumulative probability threshold for nucleus sampling. Defaults to 0.95
additionalProperties: false
required:
- type
title: TopPSamplingStrategy
description: >-
Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
with cumulative probability >= p.
EvaluateRowsRequest:
type: object
properties:

View file

@ -20,9 +20,7 @@ from llama_stack.apis.inference import (
OpenAIMessageParam,
OpenAIResponseFormatParam,
OpenAIToolMessageParam,
ToolChoice,
ToolConfig,
ToolPromptFormat,
)
from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
@ -107,7 +105,6 @@ class StepType(StrEnum):
memory_retrieval = "memory_retrieval"
@json_schema_type
@json_schema_type
class InferenceStep(StepCommon):
"""An inference step in an agent turn.
@ -208,15 +205,6 @@ class Session(BaseModel):
started_at: datetime
class AgentToolGroupWithArgs(BaseModel):
name: str
args: dict[str, Any]
AgentToolGroup = str | AgentToolGroupWithArgs
register_schema(AgentToolGroup, name="AgentTool")
class AgentConfigCommon(BaseModel):
max_output_tokens: int | None = None
temperature: float | None = None
@ -225,7 +213,7 @@ class AgentConfigCommon(BaseModel):
input_shields: list[str] | None = Field(default_factory=list)
output_shields: list[str] | None = Field(default_factory=list)
toolgroups: list[AgentToolGroup] | None = Field(default_factory=list)
tools: list[OpenAIResponseInputTool] | None = Field(default_factory=list)
client_tools: list[OpenAIResponseInputTool | ToolDef] | None = Field(default_factory=list)
tool_config: ToolConfig | None = Field(default=None)
@ -419,7 +407,7 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
:param session_id: Unique identifier for the conversation session
:param messages: List of messages to start the turn with
:param documents: (Optional) List of documents to provide to the agent
:param toolgroups: (Optional) List of tool groups to make available for this turn
:param tools: (Optional) List of tools to make available for this turn
:param stream: (Optional) Whether to stream the response
:param tool_config: (Optional) Tool configuration to override agent defaults
"""
@ -430,7 +418,7 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
messages: list[OpenAIMessageParam]
documents: list[Document] | None = None
toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
tools: list[OpenAIResponseInputTool] | None = Field(default_factory=lambda: [])
stream: bool | None = False
tool_config: ToolConfig | None = None
@ -524,7 +512,7 @@ class Agents(Protocol):
messages: list[OpenAIMessageParam],
stream: bool | None = False,
documents: list[Document] | None = None,
toolgroups: list[AgentToolGroup] | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
tool_config: ToolConfig | None = None,
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
"""Create a new turn for an agent.
@ -534,7 +522,7 @@ class Agents(Protocol):
:param messages: List of messages to start the turn with.
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
:param documents: (Optional) List of documents to create the turn with.
:param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
:param tools: (Optional) List of tools to create the turn with, will be used in addition to the agent's config tools for the request.
:param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
:returns: If stream=False, returns a Turn object.
If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.

View file

@ -100,8 +100,6 @@ def convert_to_pydantic(annotation: Any, value: Any) -> Any:
return TypeAdapter(annotation).validate_python(value)
except Exception as e:
# TODO: this is workardound for having Union[str, AgentToolGroup] in API schema.
# We should get rid of any non-discriminated unions in the API schema.
if origin is Union:
for union_type in get_args(annotation):
try:

View file

@ -10,16 +10,13 @@ import re
import uuid
import warnings
from collections.abc import AsyncGenerator
from typing import Any
from datetime import UTC, datetime
from typing import Any
import httpx
from llama_stack.apis.agents import (
AgentConfig,
OpenAIResponseInputTool,
AgentToolGroup,
AgentToolGroupWithArgs,
AgentTurnCreateRequest,
AgentTurnResponseEvent,
AgentTurnResponseEventType,
@ -33,12 +30,19 @@ from llama_stack.apis.agents import (
Attachment,
Document,
InferenceStep,
OpenAIResponseInputTool,
ShieldCallStep,
Step,
StepType,
ToolExecutionStep,
Turn,
)
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputToolFileSearch,
OpenAIResponseInputToolFunction,
OpenAIResponseInputToolMCP,
OpenAIResponseInputToolWebSearch,
)
from llama_stack.apis.common.content_types import URL, ToolCallDelta, ToolCallParseStatus
from llama_stack.apis.common.errors import SessionNotFoundError
from llama_stack.apis.inference import (
@ -47,13 +51,12 @@ from llama_stack.apis.inference import (
Inference,
Message,
OpenAIAssistantMessageParam,
OpenAIDeveloperMessageParam,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionMessageContent,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIImageURL,
OpenAIDeveloperMessageParam,
OpenAIMessageParam,
OpenAISystemMessageParam,
OpenAIToolMessageParam,
@ -123,7 +126,9 @@ def _openai_tool_call_to_legacy(tool_call: OpenAIChatCompletionToolCall) -> Tool
def _legacy_tool_call_to_openai(tool_call: ToolCall, index: int | None = None) -> OpenAIChatCompletionToolCall:
function_name = tool_call.tool_name if not isinstance(tool_call.tool_name, BuiltinTool) else tool_call.tool_name.value
function_name = (
tool_call.tool_name if not isinstance(tool_call.tool_name, BuiltinTool) else tool_call.tool_name.value
)
return OpenAIChatCompletionToolCall(
index=index,
id=tool_call.call_id,
@ -178,9 +183,9 @@ def _coerce_to_text(content: Any) -> str:
if isinstance(content, list):
return "\n".join(_coerce_to_text(item) for item in content)
if hasattr(content, "text"):
return getattr(content, "text")
return content.text
if hasattr(content, "image"):
image = getattr(content, "image")
image = content.image
if hasattr(image, "url") and image.url:
return getattr(image.url, "uri", "")
return str(content)
@ -200,10 +205,7 @@ def _openai_message_param_to_legacy(message: OpenAIMessageParam) -> Message:
# Map developer messages to user role for legacy compatibility
return UserMessage(content=_openai_message_content_to_text(message.content))
if isinstance(message, OpenAIAssistantMessageParam):
tool_calls = [
_openai_tool_call_to_legacy(tool_call)
for tool_call in message.tool_calls or []
]
tool_calls = [_openai_tool_call_to_legacy(tool_call) for tool_call in message.tool_calls or []]
return CompletionMessage(
content=_openai_message_content_to_text(message.content) if message.content is not None else "",
stop_reason=StopReason.end_of_turn,
@ -279,6 +281,10 @@ class ChatAgent(ShieldRunnerMixin):
self.created_at = created_at
self.telemetry_enabled = telemetry_enabled
self.tool_defs: list[ToolDefinition] = []
self.tool_name_to_args: dict[str | BuiltinTool, dict[str, Any]] = {}
self.client_tools_config: list[OpenAIResponseInputTool | ToolDef] = []
ShieldRunnerMixin.__init__(
self,
safety_api,
@ -367,7 +373,7 @@ class ChatAgent(ShieldRunnerMixin):
if self.agent_config.name:
span.set_attribute("agent_name", self.agent_config.name)
await self._initialize_tools(request.toolgroups)
await self._initialize_tools(request.tools)
async for chunk in self._run_turn(request, turn_id):
yield chunk
@ -682,12 +688,11 @@ class ChatAgent(ShieldRunnerMixin):
# Build a map of custom tools to their definitions for faster lookup
client_tools: dict[str, OpenAIResponseInputTool | ToolDef] = {}
if self.agent_config.client_tools:
for tool in self.agent_config.client_tools:
if isinstance(tool, ToolDef) and tool.name:
client_tools[tool.name] = tool
elif getattr(tool, "type", None) == "function" and getattr(tool, "name", None):
client_tools[tool.name] = tool
for tool in self.client_tools_config or []:
if isinstance(tool, ToolDef) and tool.name:
client_tools[tool.name] = tool
elif getattr(tool, "type", None) == "function" and getattr(tool, "name", None):
client_tools[tool.name] = tool
while True:
step_id = str(uuid.uuid4())
inference_start_time = datetime.now(UTC).isoformat()
@ -987,91 +992,124 @@ class ChatAgent(ShieldRunnerMixin):
async def _initialize_tools(
self,
toolgroups_for_turn: list[AgentToolGroup] | None = None,
tools_for_turn: list[OpenAIResponseInputTool] | None = None,
) -> None:
toolgroup_to_args = {}
for toolgroup in (self.agent_config.toolgroups or []) + (toolgroups_for_turn or []):
if isinstance(toolgroup, AgentToolGroupWithArgs):
tool_group_name, _ = self._parse_toolgroup_name(toolgroup.name)
toolgroup_to_args[tool_group_name] = toolgroup.args
# Determine which tools to include
tool_groups_to_include = toolgroups_for_turn or self.agent_config.toolgroups or []
agent_config_toolgroups = []
for toolgroup in tool_groups_to_include:
name = toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup
if name not in agent_config_toolgroups:
agent_config_toolgroups.append(name)
toolgroup_to_args = toolgroup_to_args or {}
tool_name_to_def: dict[str | BuiltinTool, ToolDefinition] = {}
tool_name_to_args: dict[str | BuiltinTool, dict[str, Any]] = {}
client_tools_map: dict[str, OpenAIResponseInputTool | ToolDef] = {}
def add_tool_definition(identifier: str | BuiltinTool, tool_definition: ToolDefinition) -> None:
if identifier in tool_name_to_def:
raise ValueError(f"Tool {identifier} already exists")
tool_name_to_def[identifier] = tool_definition
def add_client_tool(tool: OpenAIResponseInputTool | ToolDef) -> None:
name = getattr(tool, "name", None)
if isinstance(tool, ToolDef):
name = tool.name
if not name:
raise ValueError("Client tools must have a name")
if name not in client_tools_map:
client_tools_map[name] = tool
tool_definition = _client_tool_to_tool_definition(tool)
add_tool_definition(tool_definition.tool_name, tool_definition)
if self.agent_config.client_tools:
for tool in self.agent_config.client_tools:
tool_definition = _client_tool_to_tool_definition(tool)
if tool_name_to_def.get(tool_definition.tool_name):
raise ValueError(f"Tool {tool_definition.tool_name} already exists")
tool_name_to_def[tool_definition.tool_name] = tool_definition
for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups:
toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name)
tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name)
if not tools.data:
available_tool_groups = ", ".join(
[t.identifier for t in (await self.tool_groups_api.list_tool_groups()).data]
)
raise ValueError(f"Toolgroup {toolgroup_name} not found, available toolgroups: {available_tool_groups}")
if input_tool_name is not None and not any(tool.name == input_tool_name for tool in tools.data):
raise ValueError(
f"Tool {input_tool_name} not found in toolgroup {toolgroup_name}. Available tools: {', '.join([tool.name for tool in tools.data])}"
)
add_client_tool(tool)
for tool_def in tools.data:
if toolgroup_name.startswith("builtin") and toolgroup_name != RAG_TOOL_GROUP:
identifier: str | BuiltinTool | None = tool_def.name
if identifier == "web_search":
identifier = BuiltinTool.brave_search
else:
identifier = BuiltinTool(identifier)
else:
# add if tool_name is unspecified or the tool_def identifier is the same as the tool_name
if input_tool_name in (None, tool_def.name):
identifier = tool_def.name
else:
identifier = None
effective_tools = tools_for_turn
if effective_tools is None:
effective_tools = self.agent_config.tools
if tool_name_to_def.get(identifier, None):
raise ValueError(f"Tool {identifier} already exists")
if identifier:
tool_name_to_def[identifier] = ToolDefinition(
for tool in effective_tools or []:
if isinstance(tool, OpenAIResponseInputToolFunction):
add_client_tool(tool)
continue
resolved_tools = await self._resolve_response_tool(tool)
for identifier, definition, args in resolved_tools:
add_tool_definition(identifier, definition)
if args:
existing_args = tool_name_to_args.get(identifier, {})
tool_name_to_args[identifier] = {**existing_args, **args}
self.tool_defs = list(tool_name_to_def.values())
self.tool_name_to_args = tool_name_to_args
self.client_tools_config = list(client_tools_map.values())
async def _resolve_response_tool(
self,
tool: OpenAIResponseInputTool,
) -> list[tuple[str | BuiltinTool, ToolDefinition, dict[str, Any]]]:
if isinstance(tool, OpenAIResponseInputToolWebSearch):
tool_def = await self.tool_groups_api.get_tool(WEB_SEARCH_TOOL)
if tool_def is None:
raise ValueError("web_search tool is not registered")
identifier: str | BuiltinTool = BuiltinTool.brave_search
return [
(
identifier,
ToolDefinition(
tool_name=identifier,
description=tool_def.description,
input_schema=tool_def.input_schema,
),
{},
)
]
if isinstance(tool, OpenAIResponseInputToolFileSearch):
tool_def = await self.tool_groups_api.get_tool(MEMORY_QUERY_TOOL)
if tool_def is None:
raise ValueError("knowledge_search tool is not registered")
args: dict[str, Any] = {
"vector_db_ids": tool.vector_store_ids,
}
if tool.filters is not None:
args["filters"] = tool.filters
if tool.max_num_results is not None:
args["max_num_results"] = tool.max_num_results
if tool.ranking_options is not None:
args["ranking_options"] = tool.ranking_options.model_dump()
return [
(
tool_def.name,
ToolDefinition(
tool_name=tool_def.name,
description=tool_def.description,
input_schema=tool_def.input_schema,
),
args,
)
]
if isinstance(tool, OpenAIResponseInputToolMCP):
toolgroup_id = tool.server_label
if not toolgroup_id.startswith("mcp::"):
toolgroup_id = f"mcp::{toolgroup_id}"
tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_id)
if not tools.data:
raise ValueError(
f"No tools registered for MCP server '{tool.server_label}'. Ensure the toolgroup '{toolgroup_id}' is registered."
)
resolved: list[tuple[str | BuiltinTool, ToolDefinition, dict[str, Any]]] = []
for tool_def in tools.data:
resolved.append(
(
tool_def.name,
ToolDefinition(
tool_name=tool_def.name,
description=tool_def.description,
input_schema=tool_def.input_schema,
),
{},
)
tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {})
)
return resolved
self.tool_defs, self.tool_name_to_args = (
list(tool_name_to_def.values()),
tool_name_to_args,
)
def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, str | None]:
"""Parse a toolgroup name into its components.
Args:
toolgroup_name: The toolgroup name to parse (e.g. "builtin::rag/knowledge_search")
Returns:
A tuple of (tool_type, tool_group, tool_name)
"""
split_names = toolgroup_name_with_maybe_tool_name.split("/")
if len(split_names) == 2:
# e.g. "builtin::rag"
tool_group, tool_name = split_names
else:
tool_group, tool_name = split_names[0], None
return tool_group, tool_name
raise ValueError(f"Unsupported tool type '{getattr(tool, 'type', None)}' in agent configuration")
async def execute_tool_call_maybe(
self,

View file

@ -15,7 +15,6 @@ from llama_stack.apis.agents import (
Agents,
AgentSessionCreateResponse,
AgentStepResponse,
AgentToolGroup,
AgentTurnCreateRequest,
AgentTurnResumeRequest,
Document,
@ -32,9 +31,9 @@ from llama_stack.apis.agents.openai_responses import OpenAIResponseText
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.inference import (
Inference,
ToolConfig,
OpenAIMessageParam,
OpenAIToolMessageParam,
ToolConfig,
)
from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolRuntime
@ -156,7 +155,7 @@ class MetaReferenceAgentsImpl(Agents):
agent_id: str,
session_id: str,
messages: list[OpenAIMessageParam],
toolgroups: list[AgentToolGroup] | None = None,
tools: list[OpenAIResponseInputTool] | None = None,
documents: list[Document] | None = None,
stream: bool | None = False,
tool_config: ToolConfig | None = None,
@ -166,7 +165,7 @@ class MetaReferenceAgentsImpl(Agents):
session_id=session_id,
messages=messages,
stream=True,
toolgroups=toolgroups,
tools=tools,
documents=documents,
tool_config=tool_config,
)

View file

@ -12,12 +12,8 @@ import requests
from llama_stack_client import Agent, AgentEventLogger, Document
from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
from llama_stack.apis.agents.agents import (
AgentConfig as Server__AgentConfig,
)
from llama_stack.apis.agents.agents import (
ToolChoice,
)
from llama_stack.apis.agents.agents import AgentConfig as Server__AgentConfig
from llama_stack.apis.inference import ToolChoice
def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
@ -187,7 +183,7 @@ def test_tool_config(agent_config):
temperature=1.0,
top_p=0.9,
max_output_tokens=512,
toolgroups=[],
tools=[],
enable_session_persistence=False,
)
agent_config = AgentConfig(

View file

@ -74,7 +74,13 @@ def sample_agent_config():
max_output_tokens=0,
input_shields=["string"],
output_shields=["string"],
toolgroups=["mcp::my_mcp_server"],
tools=[
{
"type": "mcp",
"server_label": "my_mcp_server",
"server_url": "http://example.com/mcp",
}
],
client_tools=[
{
"type": "function",