mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
refactor(agent): drop AgentToolGroup for responses tools
This commit is contained in:
parent
c56b2deb7d
commit
ce44b9d6f6
12 changed files with 4051 additions and 4225 deletions
2369
docs/static/deprecated-llama-stack-spec.html
vendored
2369
docs/static/deprecated-llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
1780
docs/static/deprecated-llama-stack-spec.yaml
vendored
1780
docs/static/deprecated-llama-stack-spec.yaml
vendored
File diff suppressed because it is too large
Load diff
1623
docs/static/experimental-llama-stack-spec.html
vendored
1623
docs/static/experimental-llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
1240
docs/static/experimental-llama-stack-spec.yaml
vendored
1240
docs/static/experimental-llama-stack-spec.yaml
vendored
File diff suppressed because it is too large
Load diff
569
docs/static/stainless-llama-stack-spec.html
vendored
569
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -15576,8 +15576,20 @@
|
||||||
"AgentConfig": {
|
"AgentConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"sampling_params": {
|
"max_output_tokens": {
|
||||||
"$ref": "#/components/schemas/SamplingParams"
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"stop": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"input_shields": {
|
"input_shields": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
|
|
@ -15591,40 +15603,25 @@
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"toolgroups": {
|
"tools": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AgentTool"
|
"$ref": "#/components/schemas/OpenAIResponseInputTool"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"client_tools": {
|
"client_tools": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolDef"
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputTool"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/ToolDef"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tool_choice": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"auto",
|
|
||||||
"required",
|
|
||||||
"none"
|
|
||||||
],
|
|
||||||
"title": "ToolChoice",
|
|
||||||
"description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
|
|
||||||
"deprecated": true
|
|
||||||
},
|
|
||||||
"tool_prompt_format": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"json",
|
|
||||||
"function_tag",
|
|
||||||
"python_list"
|
|
||||||
],
|
|
||||||
"title": "ToolPromptFormat",
|
|
||||||
"description": "Prompt format for calling custom / zero shot tools.",
|
|
||||||
"deprecated": true
|
|
||||||
},
|
|
||||||
"tool_config": {
|
"tool_config": {
|
||||||
"$ref": "#/components/schemas/ToolConfig"
|
"$ref": "#/components/schemas/ToolConfig"
|
||||||
},
|
},
|
||||||
|
|
@ -15650,7 +15647,7 @@
|
||||||
"description": "Optional flag indicating whether session data has to be persisted"
|
"description": "Optional flag indicating whether session data has to be persisted"
|
||||||
},
|
},
|
||||||
"response_format": {
|
"response_format": {
|
||||||
"$ref": "#/components/schemas/ResponseFormat",
|
"$ref": "#/components/schemas/OpenAIResponseFormatParam",
|
||||||
"description": "Optional response format configuration"
|
"description": "Optional response format configuration"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -15662,232 +15659,6 @@
|
||||||
"title": "AgentConfig",
|
"title": "AgentConfig",
|
||||||
"description": "Configuration for an agent."
|
"description": "Configuration for an agent."
|
||||||
},
|
},
|
||||||
"AgentTool": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"name": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"args": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"name",
|
|
||||||
"args"
|
|
||||||
],
|
|
||||||
"title": "AgentToolGroupWithArgs"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"GrammarResponseFormat": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"json_schema",
|
|
||||||
"grammar"
|
|
||||||
],
|
|
||||||
"description": "Must be \"grammar\" to identify this format type",
|
|
||||||
"const": "grammar",
|
|
||||||
"default": "grammar"
|
|
||||||
},
|
|
||||||
"bnf": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"description": "The BNF grammar specification the response should conform to"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"bnf"
|
|
||||||
],
|
|
||||||
"title": "GrammarResponseFormat",
|
|
||||||
"description": "Configuration for grammar-guided response generation."
|
|
||||||
},
|
|
||||||
"GreedySamplingStrategy": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "greedy",
|
|
||||||
"default": "greedy",
|
|
||||||
"description": "Must be \"greedy\" to identify this sampling strategy"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type"
|
|
||||||
],
|
|
||||||
"title": "GreedySamplingStrategy",
|
|
||||||
"description": "Greedy sampling strategy that selects the highest probability token at each step."
|
|
||||||
},
|
|
||||||
"JsonSchemaResponseFormat": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"json_schema",
|
|
||||||
"grammar"
|
|
||||||
],
|
|
||||||
"description": "Must be \"json_schema\" to identify this format type",
|
|
||||||
"const": "json_schema",
|
|
||||||
"default": "json_schema"
|
|
||||||
},
|
|
||||||
"json_schema": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"description": "The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model."
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"json_schema"
|
|
||||||
],
|
|
||||||
"title": "JsonSchemaResponseFormat",
|
|
||||||
"description": "Configuration for JSON schema-guided response generation."
|
|
||||||
},
|
|
||||||
"ResponseFormat": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/JsonSchemaResponseFormat"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/GrammarResponseFormat"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"discriminator": {
|
|
||||||
"propertyName": "type",
|
|
||||||
"mapping": {
|
|
||||||
"json_schema": "#/components/schemas/JsonSchemaResponseFormat",
|
|
||||||
"grammar": "#/components/schemas/GrammarResponseFormat"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"SamplingParams": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"strategy": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/GreedySamplingStrategy"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/TopPSamplingStrategy"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/TopKSamplingStrategy"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"discriminator": {
|
|
||||||
"propertyName": "type",
|
|
||||||
"mapping": {
|
|
||||||
"greedy": "#/components/schemas/GreedySamplingStrategy",
|
|
||||||
"top_p": "#/components/schemas/TopPSamplingStrategy",
|
|
||||||
"top_k": "#/components/schemas/TopKSamplingStrategy"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"description": "The sampling strategy."
|
|
||||||
},
|
|
||||||
"max_tokens": {
|
|
||||||
"type": "integer",
|
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
|
||||||
},
|
|
||||||
"repetition_penalty": {
|
|
||||||
"type": "number",
|
|
||||||
"default": 1.0,
|
|
||||||
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
|
|
||||||
},
|
|
||||||
"stop": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"strategy"
|
|
||||||
],
|
|
||||||
"title": "SamplingParams",
|
|
||||||
"description": "Sampling parameters."
|
|
||||||
},
|
|
||||||
"ToolConfig": {
|
"ToolConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -15933,54 +15704,6 @@
|
||||||
"title": "ToolConfig",
|
"title": "ToolConfig",
|
||||||
"description": "Configuration for tool use."
|
"description": "Configuration for tool use."
|
||||||
},
|
},
|
||||||
"TopKSamplingStrategy": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "top_k",
|
|
||||||
"default": "top_k",
|
|
||||||
"description": "Must be \"top_k\" to identify this sampling strategy"
|
|
||||||
},
|
|
||||||
"top_k": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "Number of top tokens to consider for sampling. Must be at least 1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type",
|
|
||||||
"top_k"
|
|
||||||
],
|
|
||||||
"title": "TopKSamplingStrategy",
|
|
||||||
"description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
|
|
||||||
},
|
|
||||||
"TopPSamplingStrategy": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "top_p",
|
|
||||||
"default": "top_p",
|
|
||||||
"description": "Must be \"top_p\" to identify this sampling strategy"
|
|
||||||
},
|
|
||||||
"temperature": {
|
|
||||||
"type": "number",
|
|
||||||
"description": "Controls randomness in sampling. Higher values increase randomness"
|
|
||||||
},
|
|
||||||
"top_p": {
|
|
||||||
"type": "number",
|
|
||||||
"default": 0.95,
|
|
||||||
"description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"type"
|
|
||||||
],
|
|
||||||
"title": "TopPSamplingStrategy",
|
|
||||||
"description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
|
|
||||||
},
|
|
||||||
"CreateAgentRequest": {
|
"CreateAgentRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -16100,8 +15823,11 @@
|
||||||
"default": "inference"
|
"default": "inference"
|
||||||
},
|
},
|
||||||
"model_response": {
|
"model_response": {
|
||||||
"$ref": "#/components/schemas/CompletionMessage",
|
"$ref": "#/components/schemas/OpenAIAssistantMessageParam",
|
||||||
"description": "The response from the LLM."
|
"description": "The response from the LLM."
|
||||||
|
},
|
||||||
|
"finish_reason": {
|
||||||
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
@ -16153,7 +15879,17 @@
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
"inserted_context": {
|
"inserted_context": {
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"description": "The context retrieved from the vector databases."
|
"description": "The context retrieved from the vector databases."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
@ -16287,14 +16023,14 @@
|
||||||
"tool_calls": {
|
"tool_calls": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolCall"
|
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
|
||||||
},
|
},
|
||||||
"description": "The tool calls to execute."
|
"description": "The tool calls to execute."
|
||||||
},
|
},
|
||||||
"tool_responses": {
|
"tool_responses": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolResponse"
|
"$ref": "#/components/schemas/OpenAIToolMessageParam"
|
||||||
},
|
},
|
||||||
"description": "The tool responses from the tool calls."
|
"description": "The tool responses from the tool calls."
|
||||||
}
|
}
|
||||||
|
|
@ -16310,71 +16046,6 @@
|
||||||
"title": "ToolExecutionStep",
|
"title": "ToolExecutionStep",
|
||||||
"description": "A tool execution step in an agent turn."
|
"description": "A tool execution step in an agent turn."
|
||||||
},
|
},
|
||||||
"ToolResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"call_id": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Unique identifier for the tool call this response is for"
|
|
||||||
},
|
|
||||||
"tool_name": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"brave_search",
|
|
||||||
"wolfram_alpha",
|
|
||||||
"photogen",
|
|
||||||
"code_interpreter"
|
|
||||||
],
|
|
||||||
"title": "BuiltinTool"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"description": "Name of the tool that was invoked"
|
|
||||||
},
|
|
||||||
"content": {
|
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
|
||||||
"description": "The response content from the tool"
|
|
||||||
},
|
|
||||||
"metadata": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"description": "(Optional) Additional metadata about the tool response"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"call_id",
|
|
||||||
"tool_name",
|
|
||||||
"content"
|
|
||||||
],
|
|
||||||
"title": "ToolResponse",
|
|
||||||
"description": "Response from a tool invocation."
|
|
||||||
},
|
|
||||||
"Turn": {
|
"Turn": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -16389,14 +16060,7 @@
|
||||||
"input_messages": {
|
"input_messages": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"oneOf": [
|
"$ref": "#/components/schemas/OpenAIMessageParam"
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/UserMessage"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"description": "List of messages that initiated this turn"
|
"description": "List of messages that initiated this turn"
|
||||||
},
|
},
|
||||||
|
|
@ -16430,9 +16094,12 @@
|
||||||
"description": "Ordered list of processing steps executed during this turn"
|
"description": "Ordered list of processing steps executed during this turn"
|
||||||
},
|
},
|
||||||
"output_message": {
|
"output_message": {
|
||||||
"$ref": "#/components/schemas/CompletionMessage",
|
"$ref": "#/components/schemas/OpenAIAssistantMessageParam",
|
||||||
"description": "The model's generated response containing content and metadata"
|
"description": "The model's generated response containing content and metadata"
|
||||||
},
|
},
|
||||||
|
"finish_reason": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"output_attachments": {
|
"output_attachments": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -16443,13 +16110,10 @@
|
||||||
{
|
{
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -16502,14 +16166,7 @@
|
||||||
"messages": {
|
"messages": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"oneOf": [
|
"$ref": "#/components/schemas/OpenAIMessageParam"
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/UserMessage"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"description": "List of messages to start the turn with."
|
"description": "List of messages to start the turn with."
|
||||||
},
|
},
|
||||||
|
|
@ -16527,13 +16184,10 @@
|
||||||
{
|
{
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -16557,12 +16211,12 @@
|
||||||
},
|
},
|
||||||
"description": "(Optional) List of documents to create the turn with."
|
"description": "(Optional) List of documents to create the turn with."
|
||||||
},
|
},
|
||||||
"toolgroups": {
|
"tools": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AgentTool"
|
"$ref": "#/components/schemas/OpenAIResponseInputTool"
|
||||||
},
|
},
|
||||||
"description": "(Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request."
|
"description": "(Optional) List of tools to create the turn with, will be used in addition to the agent's config tools for the request."
|
||||||
},
|
},
|
||||||
"tool_config": {
|
"tool_config": {
|
||||||
"$ref": "#/components/schemas/ToolConfig",
|
"$ref": "#/components/schemas/ToolConfig",
|
||||||
|
|
@ -17015,7 +16669,7 @@
|
||||||
"tool_responses": {
|
"tool_responses": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ToolResponse"
|
"$ref": "#/components/schemas/OpenAIToolMessageParam"
|
||||||
},
|
},
|
||||||
"description": "The tool call responses to resume the turn with."
|
"description": "The tool call responses to resume the turn with."
|
||||||
},
|
},
|
||||||
|
|
@ -17285,6 +16939,23 @@
|
||||||
"title": "BenchmarkConfig",
|
"title": "BenchmarkConfig",
|
||||||
"description": "A benchmark configuration for evaluation."
|
"description": "A benchmark configuration for evaluation."
|
||||||
},
|
},
|
||||||
|
"GreedySamplingStrategy": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "greedy",
|
||||||
|
"default": "greedy",
|
||||||
|
"description": "Must be \"greedy\" to identify this sampling strategy"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "GreedySamplingStrategy",
|
||||||
|
"description": "Greedy sampling strategy that selects the highest probability token at each step."
|
||||||
|
},
|
||||||
"ModelCandidate": {
|
"ModelCandidate": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -17315,6 +16986,104 @@
|
||||||
"title": "ModelCandidate",
|
"title": "ModelCandidate",
|
||||||
"description": "A model candidate for evaluation."
|
"description": "A model candidate for evaluation."
|
||||||
},
|
},
|
||||||
|
"SamplingParams": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"strategy": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/GreedySamplingStrategy"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/TopPSamplingStrategy"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/TopKSamplingStrategy"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"discriminator": {
|
||||||
|
"propertyName": "type",
|
||||||
|
"mapping": {
|
||||||
|
"greedy": "#/components/schemas/GreedySamplingStrategy",
|
||||||
|
"top_p": "#/components/schemas/TopPSamplingStrategy",
|
||||||
|
"top_k": "#/components/schemas/TopKSamplingStrategy"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": "The sampling strategy."
|
||||||
|
},
|
||||||
|
"max_tokens": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 0,
|
||||||
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
|
},
|
||||||
|
"repetition_penalty": {
|
||||||
|
"type": "number",
|
||||||
|
"default": 1.0,
|
||||||
|
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
|
||||||
|
},
|
||||||
|
"stop": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"strategy"
|
||||||
|
],
|
||||||
|
"title": "SamplingParams",
|
||||||
|
"description": "Sampling parameters."
|
||||||
|
},
|
||||||
|
"TopKSamplingStrategy": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "top_k",
|
||||||
|
"default": "top_k",
|
||||||
|
"description": "Must be \"top_k\" to identify this sampling strategy"
|
||||||
|
},
|
||||||
|
"top_k": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Number of top tokens to consider for sampling. Must be at least 1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"top_k"
|
||||||
|
],
|
||||||
|
"title": "TopKSamplingStrategy",
|
||||||
|
"description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
|
||||||
|
},
|
||||||
|
"TopPSamplingStrategy": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "top_p",
|
||||||
|
"default": "top_p",
|
||||||
|
"description": "Must be \"top_p\" to identify this sampling strategy"
|
||||||
|
},
|
||||||
|
"temperature": {
|
||||||
|
"type": "number",
|
||||||
|
"description": "Controls randomness in sampling. Higher values increase randomness"
|
||||||
|
},
|
||||||
|
"top_p": {
|
||||||
|
"type": "number",
|
||||||
|
"default": 0.95,
|
||||||
|
"description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "TopPSamplingStrategy",
|
||||||
|
"description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
|
||||||
|
},
|
||||||
"EvaluateRowsRequest": {
|
"EvaluateRowsRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
|
||||||
424
docs/static/stainless-llama-stack-spec.yaml
vendored
424
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -11663,8 +11663,16 @@ components:
|
||||||
AgentConfig:
|
AgentConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
sampling_params:
|
max_output_tokens:
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
type: integer
|
||||||
|
temperature:
|
||||||
|
type: number
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
stop:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
input_shields:
|
input_shields:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -11673,36 +11681,16 @@ components:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
toolgroups:
|
tools:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AgentTool'
|
$ref: '#/components/schemas/OpenAIResponseInputTool'
|
||||||
client_tools:
|
client_tools:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolDef'
|
oneOf:
|
||||||
tool_choice:
|
- $ref: '#/components/schemas/OpenAIResponseInputTool'
|
||||||
type: string
|
- $ref: '#/components/schemas/ToolDef'
|
||||||
enum:
|
|
||||||
- auto
|
|
||||||
- required
|
|
||||||
- none
|
|
||||||
title: ToolChoice
|
|
||||||
description: >-
|
|
||||||
Whether tool use is required or automatic. This is a hint to the model
|
|
||||||
which may not be followed. It depends on the Instruction Following capabilities
|
|
||||||
of the model.
|
|
||||||
deprecated: true
|
|
||||||
tool_prompt_format:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- json
|
|
||||||
- function_tag
|
|
||||||
- python_list
|
|
||||||
title: ToolPromptFormat
|
|
||||||
description: >-
|
|
||||||
Prompt format for calling custom / zero shot tools.
|
|
||||||
deprecated: true
|
|
||||||
tool_config:
|
tool_config:
|
||||||
$ref: '#/components/schemas/ToolConfig'
|
$ref: '#/components/schemas/ToolConfig'
|
||||||
max_infer_iters:
|
max_infer_iters:
|
||||||
|
|
@ -11725,7 +11713,7 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Optional flag indicating whether session data has to be persisted
|
Optional flag indicating whether session data has to be persisted
|
||||||
response_format:
|
response_format:
|
||||||
$ref: '#/components/schemas/ResponseFormat'
|
$ref: '#/components/schemas/OpenAIResponseFormatParam'
|
||||||
description: Optional response format configuration
|
description: Optional response format configuration
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
|
|
@ -11733,157 +11721,6 @@ components:
|
||||||
- instructions
|
- instructions
|
||||||
title: AgentConfig
|
title: AgentConfig
|
||||||
description: Configuration for an agent.
|
description: Configuration for an agent.
|
||||||
AgentTool:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: object
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
args:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- args
|
|
||||||
title: AgentToolGroupWithArgs
|
|
||||||
GrammarResponseFormat:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- json_schema
|
|
||||||
- grammar
|
|
||||||
description: >-
|
|
||||||
Must be "grammar" to identify this format type
|
|
||||||
const: grammar
|
|
||||||
default: grammar
|
|
||||||
bnf:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
The BNF grammar specification the response should conform to
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- bnf
|
|
||||||
title: GrammarResponseFormat
|
|
||||||
description: >-
|
|
||||||
Configuration for grammar-guided response generation.
|
|
||||||
GreedySamplingStrategy:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: greedy
|
|
||||||
default: greedy
|
|
||||||
description: >-
|
|
||||||
Must be "greedy" to identify this sampling strategy
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
title: GreedySamplingStrategy
|
|
||||||
description: >-
|
|
||||||
Greedy sampling strategy that selects the highest probability token at each
|
|
||||||
step.
|
|
||||||
JsonSchemaResponseFormat:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- json_schema
|
|
||||||
- grammar
|
|
||||||
description: >-
|
|
||||||
Must be "json_schema" to identify this format type
|
|
||||||
const: json_schema
|
|
||||||
default: json_schema
|
|
||||||
json_schema:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
The JSON schema the response should conform to. In a Python SDK, this
|
|
||||||
is often a `pydantic` model.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- json_schema
|
|
||||||
title: JsonSchemaResponseFormat
|
|
||||||
description: >-
|
|
||||||
Configuration for JSON schema-guided response generation.
|
|
||||||
ResponseFormat:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
|
|
||||||
- $ref: '#/components/schemas/GrammarResponseFormat'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
|
|
||||||
grammar: '#/components/schemas/GrammarResponseFormat'
|
|
||||||
SamplingParams:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
strategy:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/GreedySamplingStrategy'
|
|
||||||
- $ref: '#/components/schemas/TopPSamplingStrategy'
|
|
||||||
- $ref: '#/components/schemas/TopKSamplingStrategy'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
greedy: '#/components/schemas/GreedySamplingStrategy'
|
|
||||||
top_p: '#/components/schemas/TopPSamplingStrategy'
|
|
||||||
top_k: '#/components/schemas/TopKSamplingStrategy'
|
|
||||||
description: The sampling strategy.
|
|
||||||
max_tokens:
|
|
||||||
type: integer
|
|
||||||
default: 0
|
|
||||||
description: >-
|
|
||||||
The maximum number of tokens that can be generated in the completion.
|
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
|
||||||
context length.
|
|
||||||
repetition_penalty:
|
|
||||||
type: number
|
|
||||||
default: 1.0
|
|
||||||
description: >-
|
|
||||||
Number between -2.0 and 2.0. Positive values penalize new tokens based
|
|
||||||
on whether they appear in the text so far, increasing the model's likelihood
|
|
||||||
to talk about new topics.
|
|
||||||
stop:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
Up to 4 sequences where the API will stop generating further tokens. The
|
|
||||||
returned text will not contain the stop sequence.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- strategy
|
|
||||||
title: SamplingParams
|
|
||||||
description: Sampling parameters.
|
|
||||||
ToolConfig:
|
ToolConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -11932,51 +11769,6 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
title: ToolConfig
|
title: ToolConfig
|
||||||
description: Configuration for tool use.
|
description: Configuration for tool use.
|
||||||
TopKSamplingStrategy:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: top_k
|
|
||||||
default: top_k
|
|
||||||
description: >-
|
|
||||||
Must be "top_k" to identify this sampling strategy
|
|
||||||
top_k:
|
|
||||||
type: integer
|
|
||||||
description: >-
|
|
||||||
Number of top tokens to consider for sampling. Must be at least 1
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- top_k
|
|
||||||
title: TopKSamplingStrategy
|
|
||||||
description: >-
|
|
||||||
Top-k sampling strategy that restricts sampling to the k most likely tokens.
|
|
||||||
TopPSamplingStrategy:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: top_p
|
|
||||||
default: top_p
|
|
||||||
description: >-
|
|
||||||
Must be "top_p" to identify this sampling strategy
|
|
||||||
temperature:
|
|
||||||
type: number
|
|
||||||
description: >-
|
|
||||||
Controls randomness in sampling. Higher values increase randomness
|
|
||||||
top_p:
|
|
||||||
type: number
|
|
||||||
default: 0.95
|
|
||||||
description: >-
|
|
||||||
Cumulative probability threshold for nucleus sampling. Defaults to 0.95
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
title: TopPSamplingStrategy
|
|
||||||
description: >-
|
|
||||||
Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
|
|
||||||
with cumulative probability >= p.
|
|
||||||
CreateAgentRequest:
|
CreateAgentRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -12072,8 +11864,10 @@ components:
|
||||||
const: inference
|
const: inference
|
||||||
default: inference
|
default: inference
|
||||||
model_response:
|
model_response:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||||
description: The response from the LLM.
|
description: The response from the LLM.
|
||||||
|
finish_reason:
|
||||||
|
type: string
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- turn_id
|
- turn_id
|
||||||
|
|
@ -12115,7 +11909,11 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
inserted_context:
|
inserted_context:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
description: >-
|
description: >-
|
||||||
The context retrieved from the vector databases.
|
The context retrieved from the vector databases.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
|
|
@ -12226,12 +12024,12 @@ components:
|
||||||
tool_calls:
|
tool_calls:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolCall'
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||||
description: The tool calls to execute.
|
description: The tool calls to execute.
|
||||||
tool_responses:
|
tool_responses:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolResponse'
|
$ref: '#/components/schemas/OpenAIToolMessageParam'
|
||||||
description: The tool responses from the tool calls.
|
description: The tool responses from the tool calls.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
|
|
@ -12242,46 +12040,6 @@ components:
|
||||||
- tool_responses
|
- tool_responses
|
||||||
title: ToolExecutionStep
|
title: ToolExecutionStep
|
||||||
description: A tool execution step in an agent turn.
|
description: A tool execution step in an agent turn.
|
||||||
ToolResponse:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
call_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
Unique identifier for the tool call this response is for
|
|
||||||
tool_name:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
enum:
|
|
||||||
- brave_search
|
|
||||||
- wolfram_alpha
|
|
||||||
- photogen
|
|
||||||
- code_interpreter
|
|
||||||
title: BuiltinTool
|
|
||||||
- type: string
|
|
||||||
description: Name of the tool that was invoked
|
|
||||||
content:
|
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
|
||||||
description: The response content from the tool
|
|
||||||
metadata:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: >-
|
|
||||||
(Optional) Additional metadata about the tool response
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- call_id
|
|
||||||
- tool_name
|
|
||||||
- content
|
|
||||||
title: ToolResponse
|
|
||||||
description: Response from a tool invocation.
|
|
||||||
Turn:
|
Turn:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -12296,9 +12054,7 @@ components:
|
||||||
input_messages:
|
input_messages:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
oneOf:
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
- $ref: '#/components/schemas/UserMessage'
|
|
||||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
|
||||||
description: >-
|
description: >-
|
||||||
List of messages that initiated this turn
|
List of messages that initiated this turn
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -12319,9 +12075,11 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Ordered list of processing steps executed during this turn
|
Ordered list of processing steps executed during this turn
|
||||||
output_message:
|
output_message:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||||
description: >-
|
description: >-
|
||||||
The model's generated response containing content and metadata
|
The model's generated response containing content and metadata
|
||||||
|
finish_reason:
|
||||||
|
type: string
|
||||||
output_attachments:
|
output_attachments:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -12330,10 +12088,9 @@ components:
|
||||||
content:
|
content:
|
||||||
oneOf:
|
oneOf:
|
||||||
- type: string
|
- type: string
|
||||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
|
||||||
- type: array
|
- type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/InterleavedContentItem'
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
- $ref: '#/components/schemas/URL'
|
- $ref: '#/components/schemas/URL'
|
||||||
description: The content of the attachment.
|
description: The content of the attachment.
|
||||||
mime_type:
|
mime_type:
|
||||||
|
|
@ -12373,9 +12130,7 @@ components:
|
||||||
messages:
|
messages:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
oneOf:
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
- $ref: '#/components/schemas/UserMessage'
|
|
||||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
|
||||||
description: List of messages to start the turn with.
|
description: List of messages to start the turn with.
|
||||||
stream:
|
stream:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
|
@ -12390,10 +12145,9 @@ components:
|
||||||
content:
|
content:
|
||||||
oneOf:
|
oneOf:
|
||||||
- type: string
|
- type: string
|
||||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
|
||||||
- type: array
|
- type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/InterleavedContentItem'
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
- $ref: '#/components/schemas/URL'
|
- $ref: '#/components/schemas/URL'
|
||||||
description: The content of the document.
|
description: The content of the document.
|
||||||
mime_type:
|
mime_type:
|
||||||
|
|
@ -12407,13 +12161,13 @@ components:
|
||||||
description: A document to be used by an agent.
|
description: A document to be used by an agent.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) List of documents to create the turn with.
|
(Optional) List of documents to create the turn with.
|
||||||
toolgroups:
|
tools:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AgentTool'
|
$ref: '#/components/schemas/OpenAIResponseInputTool'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) List of toolgroups to create the turn with, will be used in
|
(Optional) List of tools to create the turn with, will be used in addition
|
||||||
addition to the agent's config toolgroups for the request.
|
to the agent's config tools for the request.
|
||||||
tool_config:
|
tool_config:
|
||||||
$ref: '#/components/schemas/ToolConfig'
|
$ref: '#/components/schemas/ToolConfig'
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -12764,7 +12518,7 @@ components:
|
||||||
tool_responses:
|
tool_responses:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ToolResponse'
|
$ref: '#/components/schemas/OpenAIToolMessageParam'
|
||||||
description: >-
|
description: >-
|
||||||
The tool call responses to resume the turn with.
|
The tool call responses to resume the turn with.
|
||||||
stream:
|
stream:
|
||||||
|
|
@ -12955,6 +12709,22 @@ components:
|
||||||
title: BenchmarkConfig
|
title: BenchmarkConfig
|
||||||
description: >-
|
description: >-
|
||||||
A benchmark configuration for evaluation.
|
A benchmark configuration for evaluation.
|
||||||
|
GreedySamplingStrategy:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: greedy
|
||||||
|
default: greedy
|
||||||
|
description: >-
|
||||||
|
Must be "greedy" to identify this sampling strategy
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: GreedySamplingStrategy
|
||||||
|
description: >-
|
||||||
|
Greedy sampling strategy that selects the highest probability token at each
|
||||||
|
step.
|
||||||
ModelCandidate:
|
ModelCandidate:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -12980,6 +12750,92 @@ components:
|
||||||
- sampling_params
|
- sampling_params
|
||||||
title: ModelCandidate
|
title: ModelCandidate
|
||||||
description: A model candidate for evaluation.
|
description: A model candidate for evaluation.
|
||||||
|
SamplingParams:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
strategy:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/GreedySamplingStrategy'
|
||||||
|
- $ref: '#/components/schemas/TopPSamplingStrategy'
|
||||||
|
- $ref: '#/components/schemas/TopKSamplingStrategy'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
greedy: '#/components/schemas/GreedySamplingStrategy'
|
||||||
|
top_p: '#/components/schemas/TopPSamplingStrategy'
|
||||||
|
top_k: '#/components/schemas/TopKSamplingStrategy'
|
||||||
|
description: The sampling strategy.
|
||||||
|
max_tokens:
|
||||||
|
type: integer
|
||||||
|
default: 0
|
||||||
|
description: >-
|
||||||
|
The maximum number of tokens that can be generated in the completion.
|
||||||
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
context length.
|
||||||
|
repetition_penalty:
|
||||||
|
type: number
|
||||||
|
default: 1.0
|
||||||
|
description: >-
|
||||||
|
Number between -2.0 and 2.0. Positive values penalize new tokens based
|
||||||
|
on whether they appear in the text so far, increasing the model's likelihood
|
||||||
|
to talk about new topics.
|
||||||
|
stop:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Up to 4 sequences where the API will stop generating further tokens. The
|
||||||
|
returned text will not contain the stop sequence.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- strategy
|
||||||
|
title: SamplingParams
|
||||||
|
description: Sampling parameters.
|
||||||
|
TopKSamplingStrategy:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: top_k
|
||||||
|
default: top_k
|
||||||
|
description: >-
|
||||||
|
Must be "top_k" to identify this sampling strategy
|
||||||
|
top_k:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
Number of top tokens to consider for sampling. Must be at least 1
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- top_k
|
||||||
|
title: TopKSamplingStrategy
|
||||||
|
description: >-
|
||||||
|
Top-k sampling strategy that restricts sampling to the k most likely tokens.
|
||||||
|
TopPSamplingStrategy:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: top_p
|
||||||
|
default: top_p
|
||||||
|
description: >-
|
||||||
|
Must be "top_p" to identify this sampling strategy
|
||||||
|
temperature:
|
||||||
|
type: number
|
||||||
|
description: >-
|
||||||
|
Controls randomness in sampling. Higher values increase randomness
|
||||||
|
top_p:
|
||||||
|
type: number
|
||||||
|
default: 0.95
|
||||||
|
description: >-
|
||||||
|
Cumulative probability threshold for nucleus sampling. Defaults to 0.95
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: TopPSamplingStrategy
|
||||||
|
description: >-
|
||||||
|
Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
|
||||||
|
with cumulative probability >= p.
|
||||||
EvaluateRowsRequest:
|
EvaluateRowsRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,7 @@ from llama_stack.apis.inference import (
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIResponseFormatParam,
|
OpenAIResponseFormatParam,
|
||||||
OpenAIToolMessageParam,
|
OpenAIToolMessageParam,
|
||||||
ToolChoice,
|
|
||||||
ToolConfig,
|
ToolConfig,
|
||||||
ToolPromptFormat,
|
|
||||||
)
|
)
|
||||||
from llama_stack.apis.safety import SafetyViolation
|
from llama_stack.apis.safety import SafetyViolation
|
||||||
from llama_stack.apis.tools import ToolDef
|
from llama_stack.apis.tools import ToolDef
|
||||||
|
|
@ -107,7 +105,6 @@ class StepType(StrEnum):
|
||||||
memory_retrieval = "memory_retrieval"
|
memory_retrieval = "memory_retrieval"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class InferenceStep(StepCommon):
|
class InferenceStep(StepCommon):
|
||||||
"""An inference step in an agent turn.
|
"""An inference step in an agent turn.
|
||||||
|
|
@ -208,15 +205,6 @@ class Session(BaseModel):
|
||||||
started_at: datetime
|
started_at: datetime
|
||||||
|
|
||||||
|
|
||||||
class AgentToolGroupWithArgs(BaseModel):
|
|
||||||
name: str
|
|
||||||
args: dict[str, Any]
|
|
||||||
|
|
||||||
|
|
||||||
AgentToolGroup = str | AgentToolGroupWithArgs
|
|
||||||
register_schema(AgentToolGroup, name="AgentTool")
|
|
||||||
|
|
||||||
|
|
||||||
class AgentConfigCommon(BaseModel):
|
class AgentConfigCommon(BaseModel):
|
||||||
max_output_tokens: int | None = None
|
max_output_tokens: int | None = None
|
||||||
temperature: float | None = None
|
temperature: float | None = None
|
||||||
|
|
@ -225,7 +213,7 @@ class AgentConfigCommon(BaseModel):
|
||||||
|
|
||||||
input_shields: list[str] | None = Field(default_factory=list)
|
input_shields: list[str] | None = Field(default_factory=list)
|
||||||
output_shields: list[str] | None = Field(default_factory=list)
|
output_shields: list[str] | None = Field(default_factory=list)
|
||||||
toolgroups: list[AgentToolGroup] | None = Field(default_factory=list)
|
tools: list[OpenAIResponseInputTool] | None = Field(default_factory=list)
|
||||||
client_tools: list[OpenAIResponseInputTool | ToolDef] | None = Field(default_factory=list)
|
client_tools: list[OpenAIResponseInputTool | ToolDef] | None = Field(default_factory=list)
|
||||||
tool_config: ToolConfig | None = Field(default=None)
|
tool_config: ToolConfig | None = Field(default=None)
|
||||||
|
|
||||||
|
|
@ -419,7 +407,7 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
||||||
:param session_id: Unique identifier for the conversation session
|
:param session_id: Unique identifier for the conversation session
|
||||||
:param messages: List of messages to start the turn with
|
:param messages: List of messages to start the turn with
|
||||||
:param documents: (Optional) List of documents to provide to the agent
|
:param documents: (Optional) List of documents to provide to the agent
|
||||||
:param toolgroups: (Optional) List of tool groups to make available for this turn
|
:param tools: (Optional) List of tools to make available for this turn
|
||||||
:param stream: (Optional) Whether to stream the response
|
:param stream: (Optional) Whether to stream the response
|
||||||
:param tool_config: (Optional) Tool configuration to override agent defaults
|
:param tool_config: (Optional) Tool configuration to override agent defaults
|
||||||
"""
|
"""
|
||||||
|
|
@ -430,7 +418,7 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
||||||
messages: list[OpenAIMessageParam]
|
messages: list[OpenAIMessageParam]
|
||||||
|
|
||||||
documents: list[Document] | None = None
|
documents: list[Document] | None = None
|
||||||
toolgroups: list[AgentToolGroup] | None = Field(default_factory=lambda: [])
|
tools: list[OpenAIResponseInputTool] | None = Field(default_factory=lambda: [])
|
||||||
|
|
||||||
stream: bool | None = False
|
stream: bool | None = False
|
||||||
tool_config: ToolConfig | None = None
|
tool_config: ToolConfig | None = None
|
||||||
|
|
@ -524,7 +512,7 @@ class Agents(Protocol):
|
||||||
messages: list[OpenAIMessageParam],
|
messages: list[OpenAIMessageParam],
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
documents: list[Document] | None = None,
|
documents: list[Document] | None = None,
|
||||||
toolgroups: list[AgentToolGroup] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
tool_config: ToolConfig | None = None,
|
tool_config: ToolConfig | None = None,
|
||||||
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
|
) -> Turn | AsyncIterator[AgentTurnResponseStreamChunk]:
|
||||||
"""Create a new turn for an agent.
|
"""Create a new turn for an agent.
|
||||||
|
|
@ -534,7 +522,7 @@ class Agents(Protocol):
|
||||||
:param messages: List of messages to start the turn with.
|
:param messages: List of messages to start the turn with.
|
||||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||||
:param documents: (Optional) List of documents to create the turn with.
|
:param documents: (Optional) List of documents to create the turn with.
|
||||||
:param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
|
:param tools: (Optional) List of tools to create the turn with, will be used in addition to the agent's config tools for the request.
|
||||||
:param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
|
:param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
|
||||||
:returns: If stream=False, returns a Turn object.
|
:returns: If stream=False, returns a Turn object.
|
||||||
If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
|
If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk.
|
||||||
|
|
|
||||||
|
|
@ -100,8 +100,6 @@ def convert_to_pydantic(annotation: Any, value: Any) -> Any:
|
||||||
return TypeAdapter(annotation).validate_python(value)
|
return TypeAdapter(annotation).validate_python(value)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# TODO: this is workardound for having Union[str, AgentToolGroup] in API schema.
|
|
||||||
# We should get rid of any non-discriminated unions in the API schema.
|
|
||||||
if origin is Union:
|
if origin is Union:
|
||||||
for union_type in get_args(annotation):
|
for union_type in get_args(annotation):
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -10,16 +10,13 @@ import re
|
||||||
import uuid
|
import uuid
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from typing import Any
|
|
||||||
from datetime import UTC, datetime
|
from datetime import UTC, datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from llama_stack.apis.agents import (
|
from llama_stack.apis.agents import (
|
||||||
AgentConfig,
|
AgentConfig,
|
||||||
OpenAIResponseInputTool,
|
|
||||||
AgentToolGroup,
|
|
||||||
AgentToolGroupWithArgs,
|
|
||||||
AgentTurnCreateRequest,
|
AgentTurnCreateRequest,
|
||||||
AgentTurnResponseEvent,
|
AgentTurnResponseEvent,
|
||||||
AgentTurnResponseEventType,
|
AgentTurnResponseEventType,
|
||||||
|
|
@ -33,12 +30,19 @@ from llama_stack.apis.agents import (
|
||||||
Attachment,
|
Attachment,
|
||||||
Document,
|
Document,
|
||||||
InferenceStep,
|
InferenceStep,
|
||||||
|
OpenAIResponseInputTool,
|
||||||
ShieldCallStep,
|
ShieldCallStep,
|
||||||
Step,
|
Step,
|
||||||
StepType,
|
StepType,
|
||||||
ToolExecutionStep,
|
ToolExecutionStep,
|
||||||
Turn,
|
Turn,
|
||||||
)
|
)
|
||||||
|
from llama_stack.apis.agents.openai_responses import (
|
||||||
|
OpenAIResponseInputToolFileSearch,
|
||||||
|
OpenAIResponseInputToolFunction,
|
||||||
|
OpenAIResponseInputToolMCP,
|
||||||
|
OpenAIResponseInputToolWebSearch,
|
||||||
|
)
|
||||||
from llama_stack.apis.common.content_types import URL, ToolCallDelta, ToolCallParseStatus
|
from llama_stack.apis.common.content_types import URL, ToolCallDelta, ToolCallParseStatus
|
||||||
from llama_stack.apis.common.errors import SessionNotFoundError
|
from llama_stack.apis.common.errors import SessionNotFoundError
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
|
|
@ -47,13 +51,12 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
Message,
|
Message,
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIDeveloperMessageParam,
|
|
||||||
OpenAIChatCompletionContentPartImageParam,
|
OpenAIChatCompletionContentPartImageParam,
|
||||||
OpenAIChatCompletionContentPartTextParam,
|
OpenAIChatCompletionContentPartTextParam,
|
||||||
OpenAIChatCompletionMessageContent,
|
OpenAIChatCompletionMessageContent,
|
||||||
OpenAIChatCompletionToolCall,
|
OpenAIChatCompletionToolCall,
|
||||||
OpenAIChatCompletionToolCallFunction,
|
OpenAIChatCompletionToolCallFunction,
|
||||||
OpenAIImageURL,
|
OpenAIDeveloperMessageParam,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAISystemMessageParam,
|
OpenAISystemMessageParam,
|
||||||
OpenAIToolMessageParam,
|
OpenAIToolMessageParam,
|
||||||
|
|
@ -123,7 +126,9 @@ def _openai_tool_call_to_legacy(tool_call: OpenAIChatCompletionToolCall) -> Tool
|
||||||
|
|
||||||
|
|
||||||
def _legacy_tool_call_to_openai(tool_call: ToolCall, index: int | None = None) -> OpenAIChatCompletionToolCall:
|
def _legacy_tool_call_to_openai(tool_call: ToolCall, index: int | None = None) -> OpenAIChatCompletionToolCall:
|
||||||
function_name = tool_call.tool_name if not isinstance(tool_call.tool_name, BuiltinTool) else tool_call.tool_name.value
|
function_name = (
|
||||||
|
tool_call.tool_name if not isinstance(tool_call.tool_name, BuiltinTool) else tool_call.tool_name.value
|
||||||
|
)
|
||||||
return OpenAIChatCompletionToolCall(
|
return OpenAIChatCompletionToolCall(
|
||||||
index=index,
|
index=index,
|
||||||
id=tool_call.call_id,
|
id=tool_call.call_id,
|
||||||
|
|
@ -178,9 +183,9 @@ def _coerce_to_text(content: Any) -> str:
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
return "\n".join(_coerce_to_text(item) for item in content)
|
return "\n".join(_coerce_to_text(item) for item in content)
|
||||||
if hasattr(content, "text"):
|
if hasattr(content, "text"):
|
||||||
return getattr(content, "text")
|
return content.text
|
||||||
if hasattr(content, "image"):
|
if hasattr(content, "image"):
|
||||||
image = getattr(content, "image")
|
image = content.image
|
||||||
if hasattr(image, "url") and image.url:
|
if hasattr(image, "url") and image.url:
|
||||||
return getattr(image.url, "uri", "")
|
return getattr(image.url, "uri", "")
|
||||||
return str(content)
|
return str(content)
|
||||||
|
|
@ -200,10 +205,7 @@ def _openai_message_param_to_legacy(message: OpenAIMessageParam) -> Message:
|
||||||
# Map developer messages to user role for legacy compatibility
|
# Map developer messages to user role for legacy compatibility
|
||||||
return UserMessage(content=_openai_message_content_to_text(message.content))
|
return UserMessage(content=_openai_message_content_to_text(message.content))
|
||||||
if isinstance(message, OpenAIAssistantMessageParam):
|
if isinstance(message, OpenAIAssistantMessageParam):
|
||||||
tool_calls = [
|
tool_calls = [_openai_tool_call_to_legacy(tool_call) for tool_call in message.tool_calls or []]
|
||||||
_openai_tool_call_to_legacy(tool_call)
|
|
||||||
for tool_call in message.tool_calls or []
|
|
||||||
]
|
|
||||||
return CompletionMessage(
|
return CompletionMessage(
|
||||||
content=_openai_message_content_to_text(message.content) if message.content is not None else "",
|
content=_openai_message_content_to_text(message.content) if message.content is not None else "",
|
||||||
stop_reason=StopReason.end_of_turn,
|
stop_reason=StopReason.end_of_turn,
|
||||||
|
|
@ -279,6 +281,10 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
self.created_at = created_at
|
self.created_at = created_at
|
||||||
self.telemetry_enabled = telemetry_enabled
|
self.telemetry_enabled = telemetry_enabled
|
||||||
|
|
||||||
|
self.tool_defs: list[ToolDefinition] = []
|
||||||
|
self.tool_name_to_args: dict[str | BuiltinTool, dict[str, Any]] = {}
|
||||||
|
self.client_tools_config: list[OpenAIResponseInputTool | ToolDef] = []
|
||||||
|
|
||||||
ShieldRunnerMixin.__init__(
|
ShieldRunnerMixin.__init__(
|
||||||
self,
|
self,
|
||||||
safety_api,
|
safety_api,
|
||||||
|
|
@ -367,7 +373,7 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
if self.agent_config.name:
|
if self.agent_config.name:
|
||||||
span.set_attribute("agent_name", self.agent_config.name)
|
span.set_attribute("agent_name", self.agent_config.name)
|
||||||
|
|
||||||
await self._initialize_tools(request.toolgroups)
|
await self._initialize_tools(request.tools)
|
||||||
async for chunk in self._run_turn(request, turn_id):
|
async for chunk in self._run_turn(request, turn_id):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
|
|
@ -682,12 +688,11 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
|
|
||||||
# Build a map of custom tools to their definitions for faster lookup
|
# Build a map of custom tools to their definitions for faster lookup
|
||||||
client_tools: dict[str, OpenAIResponseInputTool | ToolDef] = {}
|
client_tools: dict[str, OpenAIResponseInputTool | ToolDef] = {}
|
||||||
if self.agent_config.client_tools:
|
for tool in self.client_tools_config or []:
|
||||||
for tool in self.agent_config.client_tools:
|
if isinstance(tool, ToolDef) and tool.name:
|
||||||
if isinstance(tool, ToolDef) and tool.name:
|
client_tools[tool.name] = tool
|
||||||
client_tools[tool.name] = tool
|
elif getattr(tool, "type", None) == "function" and getattr(tool, "name", None):
|
||||||
elif getattr(tool, "type", None) == "function" and getattr(tool, "name", None):
|
client_tools[tool.name] = tool
|
||||||
client_tools[tool.name] = tool
|
|
||||||
while True:
|
while True:
|
||||||
step_id = str(uuid.uuid4())
|
step_id = str(uuid.uuid4())
|
||||||
inference_start_time = datetime.now(UTC).isoformat()
|
inference_start_time = datetime.now(UTC).isoformat()
|
||||||
|
|
@ -987,91 +992,124 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
|
|
||||||
async def _initialize_tools(
|
async def _initialize_tools(
|
||||||
self,
|
self,
|
||||||
toolgroups_for_turn: list[AgentToolGroup] | None = None,
|
tools_for_turn: list[OpenAIResponseInputTool] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
toolgroup_to_args = {}
|
|
||||||
for toolgroup in (self.agent_config.toolgroups or []) + (toolgroups_for_turn or []):
|
|
||||||
if isinstance(toolgroup, AgentToolGroupWithArgs):
|
|
||||||
tool_group_name, _ = self._parse_toolgroup_name(toolgroup.name)
|
|
||||||
toolgroup_to_args[tool_group_name] = toolgroup.args
|
|
||||||
|
|
||||||
# Determine which tools to include
|
|
||||||
tool_groups_to_include = toolgroups_for_turn or self.agent_config.toolgroups or []
|
|
||||||
agent_config_toolgroups = []
|
|
||||||
for toolgroup in tool_groups_to_include:
|
|
||||||
name = toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup
|
|
||||||
if name not in agent_config_toolgroups:
|
|
||||||
agent_config_toolgroups.append(name)
|
|
||||||
|
|
||||||
toolgroup_to_args = toolgroup_to_args or {}
|
|
||||||
|
|
||||||
tool_name_to_def: dict[str | BuiltinTool, ToolDefinition] = {}
|
tool_name_to_def: dict[str | BuiltinTool, ToolDefinition] = {}
|
||||||
tool_name_to_args: dict[str | BuiltinTool, dict[str, Any]] = {}
|
tool_name_to_args: dict[str | BuiltinTool, dict[str, Any]] = {}
|
||||||
|
client_tools_map: dict[str, OpenAIResponseInputTool | ToolDef] = {}
|
||||||
|
|
||||||
|
def add_tool_definition(identifier: str | BuiltinTool, tool_definition: ToolDefinition) -> None:
|
||||||
|
if identifier in tool_name_to_def:
|
||||||
|
raise ValueError(f"Tool {identifier} already exists")
|
||||||
|
tool_name_to_def[identifier] = tool_definition
|
||||||
|
|
||||||
|
def add_client_tool(tool: OpenAIResponseInputTool | ToolDef) -> None:
|
||||||
|
name = getattr(tool, "name", None)
|
||||||
|
if isinstance(tool, ToolDef):
|
||||||
|
name = tool.name
|
||||||
|
if not name:
|
||||||
|
raise ValueError("Client tools must have a name")
|
||||||
|
if name not in client_tools_map:
|
||||||
|
client_tools_map[name] = tool
|
||||||
|
tool_definition = _client_tool_to_tool_definition(tool)
|
||||||
|
add_tool_definition(tool_definition.tool_name, tool_definition)
|
||||||
|
|
||||||
if self.agent_config.client_tools:
|
if self.agent_config.client_tools:
|
||||||
for tool in self.agent_config.client_tools:
|
for tool in self.agent_config.client_tools:
|
||||||
tool_definition = _client_tool_to_tool_definition(tool)
|
add_client_tool(tool)
|
||||||
if tool_name_to_def.get(tool_definition.tool_name):
|
|
||||||
raise ValueError(f"Tool {tool_definition.tool_name} already exists")
|
|
||||||
tool_name_to_def[tool_definition.tool_name] = tool_definition
|
|
||||||
for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups:
|
|
||||||
toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name)
|
|
||||||
tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name)
|
|
||||||
if not tools.data:
|
|
||||||
available_tool_groups = ", ".join(
|
|
||||||
[t.identifier for t in (await self.tool_groups_api.list_tool_groups()).data]
|
|
||||||
)
|
|
||||||
raise ValueError(f"Toolgroup {toolgroup_name} not found, available toolgroups: {available_tool_groups}")
|
|
||||||
if input_tool_name is not None and not any(tool.name == input_tool_name for tool in tools.data):
|
|
||||||
raise ValueError(
|
|
||||||
f"Tool {input_tool_name} not found in toolgroup {toolgroup_name}. Available tools: {', '.join([tool.name for tool in tools.data])}"
|
|
||||||
)
|
|
||||||
|
|
||||||
for tool_def in tools.data:
|
effective_tools = tools_for_turn
|
||||||
if toolgroup_name.startswith("builtin") and toolgroup_name != RAG_TOOL_GROUP:
|
if effective_tools is None:
|
||||||
identifier: str | BuiltinTool | None = tool_def.name
|
effective_tools = self.agent_config.tools
|
||||||
if identifier == "web_search":
|
|
||||||
identifier = BuiltinTool.brave_search
|
|
||||||
else:
|
|
||||||
identifier = BuiltinTool(identifier)
|
|
||||||
else:
|
|
||||||
# add if tool_name is unspecified or the tool_def identifier is the same as the tool_name
|
|
||||||
if input_tool_name in (None, tool_def.name):
|
|
||||||
identifier = tool_def.name
|
|
||||||
else:
|
|
||||||
identifier = None
|
|
||||||
|
|
||||||
if tool_name_to_def.get(identifier, None):
|
for tool in effective_tools or []:
|
||||||
raise ValueError(f"Tool {identifier} already exists")
|
if isinstance(tool, OpenAIResponseInputToolFunction):
|
||||||
if identifier:
|
add_client_tool(tool)
|
||||||
tool_name_to_def[identifier] = ToolDefinition(
|
continue
|
||||||
|
|
||||||
|
resolved_tools = await self._resolve_response_tool(tool)
|
||||||
|
for identifier, definition, args in resolved_tools:
|
||||||
|
add_tool_definition(identifier, definition)
|
||||||
|
if args:
|
||||||
|
existing_args = tool_name_to_args.get(identifier, {})
|
||||||
|
tool_name_to_args[identifier] = {**existing_args, **args}
|
||||||
|
|
||||||
|
self.tool_defs = list(tool_name_to_def.values())
|
||||||
|
self.tool_name_to_args = tool_name_to_args
|
||||||
|
self.client_tools_config = list(client_tools_map.values())
|
||||||
|
|
||||||
|
async def _resolve_response_tool(
|
||||||
|
self,
|
||||||
|
tool: OpenAIResponseInputTool,
|
||||||
|
) -> list[tuple[str | BuiltinTool, ToolDefinition, dict[str, Any]]]:
|
||||||
|
if isinstance(tool, OpenAIResponseInputToolWebSearch):
|
||||||
|
tool_def = await self.tool_groups_api.get_tool(WEB_SEARCH_TOOL)
|
||||||
|
if tool_def is None:
|
||||||
|
raise ValueError("web_search tool is not registered")
|
||||||
|
identifier: str | BuiltinTool = BuiltinTool.brave_search
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
identifier,
|
||||||
|
ToolDefinition(
|
||||||
tool_name=identifier,
|
tool_name=identifier,
|
||||||
description=tool_def.description,
|
description=tool_def.description,
|
||||||
input_schema=tool_def.input_schema,
|
input_schema=tool_def.input_schema,
|
||||||
|
),
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
if isinstance(tool, OpenAIResponseInputToolFileSearch):
|
||||||
|
tool_def = await self.tool_groups_api.get_tool(MEMORY_QUERY_TOOL)
|
||||||
|
if tool_def is None:
|
||||||
|
raise ValueError("knowledge_search tool is not registered")
|
||||||
|
args: dict[str, Any] = {
|
||||||
|
"vector_db_ids": tool.vector_store_ids,
|
||||||
|
}
|
||||||
|
if tool.filters is not None:
|
||||||
|
args["filters"] = tool.filters
|
||||||
|
if tool.max_num_results is not None:
|
||||||
|
args["max_num_results"] = tool.max_num_results
|
||||||
|
if tool.ranking_options is not None:
|
||||||
|
args["ranking_options"] = tool.ranking_options.model_dump()
|
||||||
|
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
tool_def.name,
|
||||||
|
ToolDefinition(
|
||||||
|
tool_name=tool_def.name,
|
||||||
|
description=tool_def.description,
|
||||||
|
input_schema=tool_def.input_schema,
|
||||||
|
),
|
||||||
|
args,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
if isinstance(tool, OpenAIResponseInputToolMCP):
|
||||||
|
toolgroup_id = tool.server_label
|
||||||
|
if not toolgroup_id.startswith("mcp::"):
|
||||||
|
toolgroup_id = f"mcp::{toolgroup_id}"
|
||||||
|
tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_id)
|
||||||
|
if not tools.data:
|
||||||
|
raise ValueError(
|
||||||
|
f"No tools registered for MCP server '{tool.server_label}'. Ensure the toolgroup '{toolgroup_id}' is registered."
|
||||||
|
)
|
||||||
|
resolved: list[tuple[str | BuiltinTool, ToolDefinition, dict[str, Any]]] = []
|
||||||
|
for tool_def in tools.data:
|
||||||
|
resolved.append(
|
||||||
|
(
|
||||||
|
tool_def.name,
|
||||||
|
ToolDefinition(
|
||||||
|
tool_name=tool_def.name,
|
||||||
|
description=tool_def.description,
|
||||||
|
input_schema=tool_def.input_schema,
|
||||||
|
),
|
||||||
|
{},
|
||||||
)
|
)
|
||||||
tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {})
|
)
|
||||||
|
return resolved
|
||||||
|
|
||||||
self.tool_defs, self.tool_name_to_args = (
|
raise ValueError(f"Unsupported tool type '{getattr(tool, 'type', None)}' in agent configuration")
|
||||||
list(tool_name_to_def.values()),
|
|
||||||
tool_name_to_args,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, str | None]:
|
|
||||||
"""Parse a toolgroup name into its components.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
toolgroup_name: The toolgroup name to parse (e.g. "builtin::rag/knowledge_search")
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A tuple of (tool_type, tool_group, tool_name)
|
|
||||||
"""
|
|
||||||
split_names = toolgroup_name_with_maybe_tool_name.split("/")
|
|
||||||
if len(split_names) == 2:
|
|
||||||
# e.g. "builtin::rag"
|
|
||||||
tool_group, tool_name = split_names
|
|
||||||
else:
|
|
||||||
tool_group, tool_name = split_names[0], None
|
|
||||||
return tool_group, tool_name
|
|
||||||
|
|
||||||
async def execute_tool_call_maybe(
|
async def execute_tool_call_maybe(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ from llama_stack.apis.agents import (
|
||||||
Agents,
|
Agents,
|
||||||
AgentSessionCreateResponse,
|
AgentSessionCreateResponse,
|
||||||
AgentStepResponse,
|
AgentStepResponse,
|
||||||
AgentToolGroup,
|
|
||||||
AgentTurnCreateRequest,
|
AgentTurnCreateRequest,
|
||||||
AgentTurnResumeRequest,
|
AgentTurnResumeRequest,
|
||||||
Document,
|
Document,
|
||||||
|
|
@ -32,9 +31,9 @@ from llama_stack.apis.agents.openai_responses import OpenAIResponseText
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
ToolConfig,
|
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAIToolMessageParam,
|
OpenAIToolMessageParam,
|
||||||
|
ToolConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.safety import Safety
|
from llama_stack.apis.safety import Safety
|
||||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||||
|
|
@ -156,7 +155,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
agent_id: str,
|
agent_id: str,
|
||||||
session_id: str,
|
session_id: str,
|
||||||
messages: list[OpenAIMessageParam],
|
messages: list[OpenAIMessageParam],
|
||||||
toolgroups: list[AgentToolGroup] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
documents: list[Document] | None = None,
|
documents: list[Document] | None = None,
|
||||||
stream: bool | None = False,
|
stream: bool | None = False,
|
||||||
tool_config: ToolConfig | None = None,
|
tool_config: ToolConfig | None = None,
|
||||||
|
|
@ -166,7 +165,7 @@ class MetaReferenceAgentsImpl(Agents):
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
stream=True,
|
stream=True,
|
||||||
toolgroups=toolgroups,
|
tools=tools,
|
||||||
documents=documents,
|
documents=documents,
|
||||||
tool_config=tool_config,
|
tool_config=tool_config,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -12,12 +12,8 @@ import requests
|
||||||
from llama_stack_client import Agent, AgentEventLogger, Document
|
from llama_stack_client import Agent, AgentEventLogger, Document
|
||||||
from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
|
from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
|
||||||
|
|
||||||
from llama_stack.apis.agents.agents import (
|
from llama_stack.apis.agents.agents import AgentConfig as Server__AgentConfig
|
||||||
AgentConfig as Server__AgentConfig,
|
from llama_stack.apis.inference import ToolChoice
|
||||||
)
|
|
||||||
from llama_stack.apis.agents.agents import (
|
|
||||||
ToolChoice,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
|
def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
|
||||||
|
|
@ -187,7 +183,7 @@ def test_tool_config(agent_config):
|
||||||
temperature=1.0,
|
temperature=1.0,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
max_output_tokens=512,
|
max_output_tokens=512,
|
||||||
toolgroups=[],
|
tools=[],
|
||||||
enable_session_persistence=False,
|
enable_session_persistence=False,
|
||||||
)
|
)
|
||||||
agent_config = AgentConfig(
|
agent_config = AgentConfig(
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,13 @@ def sample_agent_config():
|
||||||
max_output_tokens=0,
|
max_output_tokens=0,
|
||||||
input_shields=["string"],
|
input_shields=["string"],
|
||||||
output_shields=["string"],
|
output_shields=["string"],
|
||||||
toolgroups=["mcp::my_mcp_server"],
|
tools=[
|
||||||
|
{
|
||||||
|
"type": "mcp",
|
||||||
|
"server_label": "my_mcp_server",
|
||||||
|
"server_url": "http://example.com/mcp",
|
||||||
|
}
|
||||||
|
],
|
||||||
client_tools=[
|
client_tools=[
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue