feat: function tools in OpenAI Responses (#2094)

# What does this PR do?

This is a combination of what was previously 3 separate PRs - #2069,
#2075, and #2083. It turns out all 3 of those are needed to land a
working function calling Responses implementation. The web search
builtin tool was already working, but this wires in support for custom
function calling.

I ended up combining all three into one PR because they all had lots of
merge conflicts, both with each other but also with #1806 that just
landed. And, because landing any of them individually would have only
left a partially working implementation merged.

The new things added here are:
* Storing of input items from previous responses and restoring of those
input items when adding previous responses to the conversation state
* Handling of multiple input item messages roles, not just "user"
messages.
* Support for custom tools passed into the Responses API to enable
function calling outside of just the builtin websearch tool.

Closes #2074
Closes #2080

## Test Plan

### Unit Tests

Several new unit tests were added, and they all pass. Ran via:

```
python -m pytest -s -v tests/unit/providers/agents/meta_reference/test_openai_responses.py
```

### Responses API Verification Tests

I ran our verification run.yaml against multiple providers to ensure we
were getting a decent pass rate. Specifically, I ensured the new custom
tool verification test passed across multiple providers and that the
multi-turn examples passed across at least some of the providers (some
providers struggle with the multi-turn workflows still).

Running the stack setup for verification testing:

```
llama stack run --image-type venv tests/verifications/openai-api-verification-run.yaml
```

Together, passing 100% as an example:

```
pytest -s -v 'tests/verifications/openai_api/test_responses.py' --provider=together-llama-stack
```

## Documentation

We will need to start documenting the OpenAI APIs, but for now the
Responses stuff is still rapidly evolving so delaying that.

---------

Signed-off-by: Derek Higgins <derekh@redhat.com>
Signed-off-by: Ben Browning <bbrownin@redhat.com>
Co-authored-by: Derek Higgins <derekh@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Ben Browning 2025-05-13 14:29:15 -04:00 committed by GitHub
parent e0d10dd0b1
commit 8e316c9b1e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 1099 additions and 370 deletions

View file

@ -6466,54 +6466,51 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
"OpenAIResponseInputMessage": {
"OpenAIResponseInput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseMessage"
}
]
},
"OpenAIResponseInputFunctionToolCallOutput": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"call_id": {
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
}
}
]
},
"role": {
"oneOf": [
{
"type": "string",
"const": "system"
},
{
"type": "string",
"const": "developer"
},
{
"type": "string",
"const": "user"
},
{
"type": "string",
"const": "assistant"
}
]
"output": {
"type": "string"
},
"type": {
"type": "string",
"const": "message",
"default": "message"
"const": "function_call_output",
"default": "function_call_output"
},
"id": {
"type": "string"
},
"status": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"content",
"role"
"call_id",
"output",
"type"
],
"title": "OpenAIResponseInputMessage"
"title": "OpenAIResponseInputFunctionToolCallOutput",
"description": "This represents the output of a function call that gets passed back to the model."
},
"OpenAIResponseInputMessageContent": {
"oneOf": [
@ -6588,6 +6585,113 @@
"title": "OpenAIResponseInputMessageContentText"
},
"OpenAIResponseInputTool": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseInputToolWebSearch"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputToolFileSearch"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
"file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
"function": "#/components/schemas/OpenAIResponseInputToolFunction"
}
}
},
"OpenAIResponseInputToolFileSearch": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "file_search",
"default": "file_search"
},
"vector_store_id": {
"type": "array",
"items": {
"type": "string"
}
},
"ranking_options": {
"type": "object",
"properties": {
"ranker": {
"type": "string"
},
"score_threshold": {
"type": "number",
"default": 0.0
}
},
"additionalProperties": false,
"title": "FileSearchRankingOptions"
}
},
"additionalProperties": false,
"required": [
"type",
"vector_store_id"
],
"title": "OpenAIResponseInputToolFileSearch"
},
"OpenAIResponseInputToolFunction": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "function",
"default": "function"
},
"name": {
"type": "string"
},
"description": {
"type": "string"
},
"parameters": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"strict": {
"type": "boolean"
}
},
"additionalProperties": false,
"required": [
"type",
"name"
],
"title": "OpenAIResponseInputToolFunction"
},
"OpenAIResponseInputToolWebSearch": {
"type": "object",
"properties": {
"type": {
@ -6614,6 +6718,146 @@
],
"title": "OpenAIResponseInputToolWebSearch"
},
"OpenAIResponseMessage": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
}
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
}
}
]
},
"role": {
"oneOf": [
{
"type": "string",
"const": "system"
},
{
"type": "string",
"const": "developer"
},
{
"type": "string",
"const": "user"
},
{
"type": "string",
"const": "assistant"
}
]
},
"type": {
"type": "string",
"const": "message",
"default": "message"
},
"id": {
"type": "string"
},
"status": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"content",
"role",
"type"
],
"title": "OpenAIResponseMessage",
"description": "Corresponds to the various Message types in the Responses API. They are all under one type because the Responses API gives them all the same \"type\" value, and there is no way to tell them apart in certain scenarios."
},
"OpenAIResponseOutputMessageContent": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "output_text",
"default": "output_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
"OpenAIResponseOutputMessageFunctionToolCall": {
"type": "object",
"properties": {
"arguments": {
"type": "string"
},
"call_id": {
"type": "string"
},
"name": {
"type": "string"
},
"type": {
"type": "string",
"const": "function_call",
"default": "function_call"
},
"id": {
"type": "string"
},
"status": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"arguments",
"call_id",
"name",
"type",
"id",
"status"
],
"title": "OpenAIResponseOutputMessageFunctionToolCall"
},
"OpenAIResponseOutputMessageWebSearchToolCall": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "web_search_call",
"default": "web_search_call"
}
},
"additionalProperties": false,
"required": [
"id",
"status",
"type"
],
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
},
"CreateOpenaiResponseRequest": {
"type": "object",
"properties": {
@ -6625,7 +6869,7 @@
{
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInputMessage"
"$ref": "#/components/schemas/OpenAIResponseInput"
}
}
],
@ -6743,98 +6987,24 @@
"OpenAIResponseOutput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessage"
"$ref": "#/components/schemas/OpenAIResponseMessage"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"message": "#/components/schemas/OpenAIResponseOutputMessage",
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
"message": "#/components/schemas/OpenAIResponseMessage",
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
}
}
},
"OpenAIResponseOutputMessage": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"content": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
}
},
"role": {
"type": "string",
"const": "assistant",
"default": "assistant"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "message",
"default": "message"
}
},
"additionalProperties": false,
"required": [
"id",
"content",
"role",
"status",
"type"
],
"title": "OpenAIResponseOutputMessage"
},
"OpenAIResponseOutputMessageContent": {
"type": "object",
"properties": {
"text": {
"type": "string"
},
"type": {
"type": "string",
"const": "output_text",
"default": "output_text"
}
},
"additionalProperties": false,
"required": [
"text",
"type"
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
"OpenAIResponseOutputMessageWebSearchToolCall": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"status": {
"type": "string"
},
"type": {
"type": "string",
"const": "web_search_call",
"default": "web_search_call"
}
},
"additionalProperties": false,
"required": [
"id",
"status",
"type"
],
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
},
"OpenAIResponseObjectStream": {
"oneOf": [
{

View file

@ -4534,34 +4534,37 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
OpenAIResponseInputMessage:
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMessage'
"OpenAIResponseInputFunctionToolCallOutput":
type: object
properties:
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
role:
oneOf:
- type: string
const: system
- type: string
const: developer
- type: string
const: user
- type: string
const: assistant
call_id:
type: string
output:
type: string
type:
type: string
const: message
default: message
const: function_call_output
default: function_call_output
id:
type: string
status:
type: string
additionalProperties: false
required:
- content
- role
title: OpenAIResponseInputMessage
- call_id
- output
- type
title: >-
OpenAIResponseInputFunctionToolCallOutput
description: >-
This represents the output of a function call that gets passed back to the
model.
OpenAIResponseInputMessageContent:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
@ -4609,6 +4612,71 @@ components:
- type
title: OpenAIResponseInputMessageContentText
OpenAIResponseInputTool:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
- $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
- $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
discriminator:
propertyName: type
mapping:
web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
function: '#/components/schemas/OpenAIResponseInputToolFunction'
OpenAIResponseInputToolFileSearch:
type: object
properties:
type:
type: string
const: file_search
default: file_search
vector_store_id:
type: array
items:
type: string
ranking_options:
type: object
properties:
ranker:
type: string
score_threshold:
type: number
default: 0.0
additionalProperties: false
title: FileSearchRankingOptions
additionalProperties: false
required:
- type
- vector_store_id
title: OpenAIResponseInputToolFileSearch
OpenAIResponseInputToolFunction:
type: object
properties:
type:
type: string
const: function
default: function
name:
type: string
description:
type: string
parameters:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
strict:
type: boolean
additionalProperties: false
required:
- type
- name
title: OpenAIResponseInputToolFunction
OpenAIResponseInputToolWebSearch:
type: object
properties:
type:
@ -4625,6 +4693,106 @@ components:
required:
- type
title: OpenAIResponseInputToolWebSearch
OpenAIResponseMessage:
type: object
properties:
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
role:
oneOf:
- type: string
const: system
- type: string
const: developer
- type: string
const: user
- type: string
const: assistant
type:
type: string
const: message
default: message
id:
type: string
status:
type: string
additionalProperties: false
required:
- content
- role
- type
title: OpenAIResponseMessage
description: >-
Corresponds to the various Message types in the Responses API. They are all
under one type because the Responses API gives them all the same "type" value,
and there is no way to tell them apart in certain scenarios.
OpenAIResponseOutputMessageContent:
type: object
properties:
text:
type: string
type:
type: string
const: output_text
default: output_text
additionalProperties: false
required:
- text
- type
title: >-
OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageFunctionToolCall":
type: object
properties:
arguments:
type: string
call_id:
type: string
name:
type: string
type:
type: string
const: function_call
default: function_call
id:
type: string
status:
type: string
additionalProperties: false
required:
- arguments
- call_id
- name
- type
- id
- status
title: >-
OpenAIResponseOutputMessageFunctionToolCall
"OpenAIResponseOutputMessageWebSearchToolCall":
type: object
properties:
id:
type: string
status:
type: string
type:
type: string
const: web_search_call
default: web_search_call
additionalProperties: false
required:
- id
- status
- type
title: >-
OpenAIResponseOutputMessageWebSearchToolCall
CreateOpenaiResponseRequest:
type: object
properties:
@ -4633,7 +4801,7 @@ components:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIResponseInputMessage'
$ref: '#/components/schemas/OpenAIResponseInput'
description: Input message(s) to create the response.
model:
type: string
@ -4717,73 +4885,15 @@ components:
title: OpenAIResponseObject
OpenAIResponseOutput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessage'
- $ref: '#/components/schemas/OpenAIResponseMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseOutputMessage'
message: '#/components/schemas/OpenAIResponseMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
OpenAIResponseOutputMessage:
type: object
properties:
id:
type: string
content:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
role:
type: string
const: assistant
default: assistant
status:
type: string
type:
type: string
const: message
default: message
additionalProperties: false
required:
- id
- content
- role
- status
- type
title: OpenAIResponseOutputMessage
OpenAIResponseOutputMessageContent:
type: object
properties:
text:
type: string
type:
type: string
const: output_text
default: output_text
additionalProperties: false
required:
- text
- type
title: >-
OpenAIResponseOutputMessageContentOutputText
"OpenAIResponseOutputMessageWebSearchToolCall":
type: object
properties:
id:
type: string
status:
type: string
type:
type: string
const: web_search_call
default: web_search_call
additionalProperties: false
required:
- id
- status
- type
title: >-
OpenAIResponseOutputMessageWebSearchToolCall
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'

View file

@ -31,7 +31,7 @@ from llama_stack.apis.tools import ToolDef
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .openai_responses import (
OpenAIResponseInputMessage,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
@ -593,7 +593,7 @@ class Agents(Protocol):
@webmethod(route="/openai/v1/responses", method="POST")
async def create_openai_response(
self,
input: str | list[OpenAIResponseInputMessage],
input: str | list[OpenAIResponseInput],
model: str,
previous_response_id: str | None = None,
store: bool | None = True,

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Annotated, Literal
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
@ -17,6 +17,28 @@ class OpenAIResponseError(BaseModel):
message: str
@json_schema_type
class OpenAIResponseInputMessageContentText(BaseModel):
text: str
type: Literal["input_text"] = "input_text"
@json_schema_type
class OpenAIResponseInputMessageContentImage(BaseModel):
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image"
# TODO: handle file_id
image_url: str | None = None
# TODO: handle file content types
OpenAIResponseInputMessageContent = Annotated[
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponseOutputMessageContentOutputText(BaseModel):
text: str
@ -31,13 +53,22 @@ register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMe
@json_schema_type
class OpenAIResponseOutputMessage(BaseModel):
id: str
content: list[OpenAIResponseOutputMessageContent]
role: Literal["assistant"] = "assistant"
status: str
class OpenAIResponseMessage(BaseModel):
"""
Corresponds to the various Message types in the Responses API.
They are all under one type because the Responses API gives them all
the same "type" value, and there is no way to tell them apart in certain
scenarios.
"""
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] = "message"
# The fields below are not used in all scenarios, but are required in others.
id: str | None = None
status: str | None = None
@json_schema_type
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
@ -46,8 +77,18 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
type: Literal["web_search_call"] = "web_search_call"
@json_schema_type
class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
arguments: str
call_id: str
name: str
type: Literal["function_call"] = "function_call"
id: str
status: str
OpenAIResponseOutput = Annotated[
OpenAIResponseOutputMessage | OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseMessage | OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFunctionToolCall,
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@ -90,32 +131,29 @@ register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
@json_schema_type
class OpenAIResponseInputMessageContentText(BaseModel):
text: str
type: Literal["input_text"] = "input_text"
class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
"""
This represents the output of a function call that gets passed back to the model.
"""
call_id: str
output: str
type: Literal["function_call_output"] = "function_call_output"
id: str | None = None
status: str | None = None
@json_schema_type
class OpenAIResponseInputMessageContentImage(BaseModel):
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image"
# TODO: handle file_id
image_url: str | None = None
# TODO: handle file content types
OpenAIResponseInputMessageContent = Annotated[
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
Field(discriminator="type"),
OpenAIResponseInput = Annotated[
# Responses API allows output messages to be passed in as input
OpenAIResponseOutputMessageWebSearchToolCall
| OpenAIResponseOutputMessageFunctionToolCall
| OpenAIResponseInputFunctionToolCallOutput
|
# Fallback to the generic message type as a last resort
OpenAIResponseMessage,
Field(union_mode="left_to_right"),
]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponseInputMessage(BaseModel):
content: str | list[OpenAIResponseInputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] | None = "message"
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
@json_schema_type
@ -126,8 +164,35 @@ class OpenAIResponseInputToolWebSearch(BaseModel):
# TODO: add user_location
@json_schema_type
class OpenAIResponseInputToolFunction(BaseModel):
type: Literal["function"] = "function"
name: str
description: str | None = None
parameters: dict[str, Any] | None
strict: bool | None = None
class FileSearchRankingOptions(BaseModel):
ranker: str | None = None
score_threshold: float | None = Field(default=0.0, ge=0.0, le=1.0)
@json_schema_type
class OpenAIResponseInputToolFileSearch(BaseModel):
type: Literal["file_search"] = "file_search"
vector_store_id: list[str]
ranking_options: FileSearchRankingOptions | None = None
# TODO: add filters
OpenAIResponseInputTool = Annotated[
OpenAIResponseInputToolWebSearch,
OpenAIResponseInputToolWebSearch | OpenAIResponseInputToolFileSearch | OpenAIResponseInputToolFunction,
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
class OpenAIResponseInputItemList(BaseModel):
data: list[OpenAIResponseInput]
object: Literal["list"] = "list"

View file

@ -18,6 +18,7 @@ from importlib.metadata import version as parse_version
from pathlib import Path
from typing import Annotated, Any
import rich.pretty
import yaml
from fastapi import Body, FastAPI, HTTPException, Request
from fastapi import Path as FastapiPath
@ -187,11 +188,30 @@ async def sse_generator(event_gen_coroutine):
)
async def log_request_pre_validation(request: Request):
if request.method in ("POST", "PUT", "PATCH"):
try:
body_bytes = await request.body()
if body_bytes:
try:
parsed_body = json.loads(body_bytes.decode())
log_output = rich.pretty.pretty_repr(parsed_body)
except (json.JSONDecodeError, UnicodeDecodeError):
log_output = repr(body_bytes)
logger.debug(f"Incoming raw request body for {request.method} {request.url.path}:\n{log_output}")
else:
logger.debug(f"Incoming {request.method} {request.url.path} request with empty body.")
except Exception as e:
logger.warning(f"Could not read or log request body for {request.method} {request.url.path}: {e}")
def create_dynamic_typed_route(func: Any, method: str, route: str):
async def endpoint(request: Request, **kwargs):
# Get auth attributes from the request scope
user_attributes = request.scope.get("user_attributes", {})
await log_request_pre_validation(request)
# Use context manager with both provider data and auth attributes
with request_provider_data_context(request.headers, user_attributes):
is_streaming = is_streaming_request(func.__name__, request, **kwargs)

View file

@ -20,7 +20,7 @@ from llama_stack.apis.agents import (
AgentTurnCreateRequest,
AgentTurnResumeRequest,
Document,
OpenAIResponseInputMessage,
OpenAIResponseInput,
OpenAIResponseInputTool,
OpenAIResponseObject,
Session,
@ -311,7 +311,7 @@ class MetaReferenceAgentsImpl(Agents):
async def create_openai_response(
self,
input: str | list[OpenAIResponseInputMessage],
input: str | list[OpenAIResponseInput],
model: str,
previous_response_id: str | None = None,
store: bool | None = True,

View file

@ -10,19 +10,26 @@ from collections.abc import AsyncIterator
from typing import cast
from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputMessage,
OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputItemList,
OpenAIResponseInputMessageContent,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
OpenAIResponseInputToolFunction,
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseObjectStreamResponseCompleted,
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseOutput,
OpenAIResponseOutputMessage,
OpenAIResponseOutputMessageContent,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.inference.inference import (
@ -32,10 +39,13 @@ from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIDeveloperMessageParam,
OpenAIImageURL,
OpenAIMessageParam,
OpenAISystemMessageParam,
OpenAIToolMessageParam,
OpenAIUserMessageParam,
)
@ -50,31 +60,110 @@ logger = get_logger(name=__name__, category="openai_responses")
OPENAI_RESPONSES_PREFIX = "openai_responses:"
async def _previous_response_to_messages(previous_response: OpenAIResponseObject) -> list[OpenAIMessageParam]:
async def _convert_response_content_to_chat_content(
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent],
) -> str | list[OpenAIChatCompletionContentPartParam]:
"""
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
The content schemas of each API look similar, but are not exactly the same.
"""
if isinstance(content, str):
return content
converted_parts = []
for content_part in content:
if isinstance(content_part, OpenAIResponseInputMessageContentText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
if content_part.image_url:
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
elif isinstance(content_part, str):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
else:
raise ValueError(
f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context"
)
return converted_parts
async def _convert_response_input_to_chat_messages(
input: str | list[OpenAIResponseInput],
) -> list[OpenAIMessageParam]:
"""
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
"""
messages: list[OpenAIMessageParam] = []
for output_message in previous_response.output:
if isinstance(output_message, OpenAIResponseOutputMessage):
messages.append(OpenAIAssistantMessageParam(content=output_message.content[0].text))
if isinstance(input, list):
for input_item in input:
if isinstance(input_item, OpenAIResponseInputFunctionToolCallOutput):
messages.append(
OpenAIToolMessageParam(
content=input_item.output,
tool_call_id=input_item.call_id,
)
)
elif isinstance(input_item, OpenAIResponseOutputMessageFunctionToolCall):
tool_call = OpenAIChatCompletionToolCall(
index=0,
id=input_item.call_id,
function=OpenAIChatCompletionToolCallFunction(
name=input_item.name,
arguments=input_item.arguments,
),
)
messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
else:
content = await _convert_response_content_to_chat_content(input_item.content)
message_type = await _get_message_type_by_role(input_item.role)
if message_type is None:
raise ValueError(
f"Llama Stack OpenAI Responses does not yet support message role '{input_item.role}' in this context"
)
messages.append(message_type(content=content))
else:
messages.append(OpenAIUserMessageParam(content=input))
return messages
async def _openai_choices_to_output_messages(choices: list[OpenAIChoice]) -> list[OpenAIResponseOutputMessage]:
output_messages = []
for choice in choices:
async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
"""
Convert an OpenAI Chat Completion choice into an OpenAI Response output message.
"""
output_content = ""
if isinstance(choice.message.content, str):
output_content = choice.message.content
elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
output_content = choice.message.content.text
# TODO: handle image content
output_messages.append(
OpenAIResponseOutputMessage(
else:
raise ValueError(
f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
)
return OpenAIResponseMessage(
id=f"msg_{uuid.uuid4()}",
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
status="completed",
role="assistant",
)
)
return output_messages
async def _get_message_type_by_role(role: str):
role_to_type = {
"user": OpenAIUserMessageParam,
"system": OpenAISystemMessageParam,
"assistant": OpenAIAssistantMessageParam,
"developer": OpenAIDeveloperMessageParam,
}
return role_to_type.get(role)
class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
input_items: OpenAIResponseInputItemList
response: OpenAIResponseObject
class OpenAIResponsesImpl:
@ -90,19 +179,45 @@ class OpenAIResponsesImpl:
self.tool_groups_api = tool_groups_api
self.tool_runtime_api = tool_runtime_api
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
async def _get_previous_response_with_input(self, id: str) -> OpenAIResponsePreviousResponseWithInputItems:
key = f"{OPENAI_RESPONSES_PREFIX}{id}"
response_json = await self.persistence_store.get(key=key)
if response_json is None:
raise ValueError(f"OpenAI response with id '{id}' not found")
return OpenAIResponseObject.model_validate_json(response_json)
return OpenAIResponsePreviousResponseWithInputItems.model_validate_json(response_json)
async def _prepend_previous_response(
self, input: str | list[OpenAIResponseInput], previous_response_id: str | None = None
):
if previous_response_id:
previous_response_with_input = await self._get_previous_response_with_input(previous_response_id)
# previous response input items
new_input_items = previous_response_with_input.input_items.data
# previous response output items
new_input_items.extend(previous_response_with_input.response.output)
# new input items from the current request
if isinstance(input, str):
new_input_items.append(OpenAIResponseMessage(content=input, role="user"))
else:
new_input_items.extend(input)
input = new_input_items
return input
async def get_openai_response(
self,
id: str,
) -> OpenAIResponseObject:
response_with_input = await self._get_previous_response_with_input(id)
return response_with_input.response
async def create_openai_response(
self,
input: str | list[OpenAIResponseInputMessage],
input: str | list[OpenAIResponseInput],
model: str,
previous_response_id: str | None = None,
store: bool | None = True,
@ -112,31 +227,8 @@ class OpenAIResponsesImpl:
):
stream = False if stream is None else stream
messages: list[OpenAIMessageParam] = []
if previous_response_id:
previous_response = await self.get_openai_response(previous_response_id)
messages.extend(await _previous_response_to_messages(previous_response))
# TODO: refactor this user_content parsing out into a separate method
user_content: str | list[OpenAIChatCompletionContentPartParam] = ""
if isinstance(input, list):
user_content = []
for user_input in input:
if isinstance(user_input.content, list):
for user_input_content in user_input.content:
if isinstance(user_input_content, OpenAIResponseInputMessageContentText):
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input_content.text))
elif isinstance(user_input_content, OpenAIResponseInputMessageContentImage):
if user_input_content.image_url:
image_url = OpenAIImageURL(
url=user_input_content.image_url, detail=user_input_content.detail
)
user_content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
else:
user_content.append(OpenAIChatCompletionContentPartTextParam(text=user_input.content))
else:
user_content = input
messages.append(OpenAIUserMessageParam(content=user_content))
input = await self._prepend_previous_response(input, previous_response_id)
messages = await _convert_response_input_to_chat_messages(input)
chat_tools = await self._convert_response_tools_to_chat_tools(tools) if tools else None
chat_response = await self.inference_api.openai_chat_completion(
model=model,
@ -150,6 +242,7 @@ class OpenAIResponsesImpl:
# TODO: refactor this into a separate method that handles streaming
chat_response_id = ""
chat_response_content = []
chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
# TODO: these chunk_ fields are hacky and only take the last chunk into account
chunk_created = 0
chunk_model = ""
@ -163,7 +256,26 @@ class OpenAIResponsesImpl:
chat_response_content.append(chunk_choice.delta.content or "")
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
# Aggregate tool call arguments across chunks, using their index as the aggregation key
if chunk_choice.delta.tool_calls:
for tool_call in chunk_choice.delta.tool_calls:
response_tool_call = chat_response_tool_calls.get(tool_call.index, None)
if response_tool_call:
response_tool_call.function.arguments += tool_call.function.arguments
else:
response_tool_call = OpenAIChatCompletionToolCall(**tool_call.model_dump())
chat_response_tool_calls[tool_call.index] = response_tool_call
# Convert the dict of tool calls by index to a list of tool calls to pass back in our response
if chat_response_tool_calls:
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
else:
tool_calls = None
assistant_message = OpenAIAssistantMessageParam(
content="".join(chat_response_content),
tool_calls=tool_calls,
)
chat_response = OpenAIChatCompletion(
id=chat_response_id,
choices=[
@ -181,12 +293,26 @@ class OpenAIResponsesImpl:
chat_response = OpenAIChatCompletion(**chat_response.model_dump())
output_messages: list[OpenAIResponseOutput] = []
if chat_response.choices[0].message.tool_calls:
output_messages.extend(
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages, temperature)
for choice in chat_response.choices:
if choice.message.tool_calls and tools:
# Assume if the first tool is a function, all tools are functions
if isinstance(tools[0], OpenAIResponseInputToolFunction):
for tool_call in choice.message.tool_calls:
output_messages.append(
OpenAIResponseOutputMessageFunctionToolCall(
arguments=tool_call.function.arguments or "",
call_id=tool_call.id,
name=tool_call.function.name or "",
id=f"fc_{uuid.uuid4()}",
status="completed",
)
)
else:
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
output_messages.extend(
await self._execute_tool_and_return_final_output(model, stream, choice, messages, temperature)
)
else:
output_messages.append(await _convert_chat_choice_to_response_message(choice))
response = OpenAIResponseObject(
created_at=chat_response.created,
id=f"resp-{uuid.uuid4()}",
@ -195,13 +321,43 @@ class OpenAIResponsesImpl:
status="completed",
output=output_messages,
)
logger.debug(f"OpenAI Responses response: {response}")
if store:
# Store in kvstore
new_input_id = f"msg_{uuid.uuid4()}"
if isinstance(input, str):
# synthesize a message from the input string
input_content = OpenAIResponseInputMessageContentText(text=input)
input_content_item = OpenAIResponseMessage(
role="user",
content=[input_content],
id=new_input_id,
)
input_items_data = [input_content_item]
else:
# we already have a list of messages
input_items_data = []
for input_item in input:
if isinstance(input_item, OpenAIResponseMessage):
# These may or may not already have an id, so dump to dict, check for id, and add if missing
input_item_dict = input_item.model_dump()
if "id" not in input_item_dict:
input_item_dict["id"] = new_input_id
input_items_data.append(OpenAIResponseMessage(**input_item_dict))
else:
input_items_data.append(input_item)
input_items = OpenAIResponseInputItemList(data=input_items_data)
prev_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
key = f"{OPENAI_RESPONSES_PREFIX}{response.id}"
await self.persistence_store.set(
key=key,
value=response.model_dump_json(),
value=prev_response.model_dump_json(),
)
if stream:
@ -221,7 +377,9 @@ class OpenAIResponsesImpl:
chat_tools: list[ChatCompletionToolParam] = []
for input_tool in tools:
# TODO: Handle other tool types
if input_tool.type == "web_search":
if input_tool.type == "function":
chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
elif input_tool.type == "web_search":
tool_name = "web_search"
tool = await self.tool_groups_api.get_tool(tool_name)
tool_def = ToolDefinition(
@ -247,12 +405,11 @@ class OpenAIResponsesImpl:
self,
model_id: str,
stream: bool,
chat_response: OpenAIChatCompletion,
choice: OpenAIChoice,
messages: list[OpenAIMessageParam],
temperature: float,
) -> list[OpenAIResponseOutput]:
output_messages: list[OpenAIResponseOutput] = []
choice = chat_response.choices[0]
# If the choice is not an assistant message, we don't need to execute any tools
if not isinstance(choice.message, OpenAIAssistantMessageParam):
@ -262,6 +419,9 @@ class OpenAIResponsesImpl:
if not choice.message.tool_calls:
return output_messages
# Copy the messages list to avoid mutating the original list
messages = messages.copy()
# Add the assistant message with tool_calls response to the messages list
messages.append(choice.message)
@ -307,7 +467,9 @@ class OpenAIResponsesImpl:
)
# type cast to appease mypy
tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices)
tool_final_outputs = [
await _convert_chat_choice_to_response_message(choice) for choice in tool_results_chat_response.choices
]
# TODO: Wire in annotations with URLs, titles, etc to these output messages
output_messages.extend(tool_final_outputs)
return output_messages

View file

@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import yaml
from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
)
FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
fixture_path = os.path.join(FIXTURES_DIR, filename)
with open(fixture_path) as f:
data = yaml.safe_load(f)
return OpenAIChatCompletion(**data)

View file

@ -0,0 +1,9 @@
id: chat-completion-123
choices:
- message:
content: "Dublin"
role: assistant
finish_reason: stop
index: 0
created: 1234567890
model: meta-llama/Llama-3.1-8B-Instruct

View file

@ -0,0 +1,14 @@
id: chat-completion-123
choices:
- message:
tool_calls:
- id: tool_call_123
type: function
function:
name: web_search
arguments: '{"query":"What is the capital of Ireland?"}'
role: assistant
finish_reason: stop
index: 0
created: 1234567890
model: meta-llama/Llama-3.1-8B-Instruct

View file

@ -4,27 +4,32 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import AsyncMock
from unittest.mock import AsyncMock, patch
import pytest
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputItemList,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputToolWebSearch,
OpenAIResponseOutputMessage,
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageWebSearchToolCall,
)
from llama_stack.apis.inference.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIChatCompletionContentPartTextParam,
OpenAIDeveloperMessageParam,
OpenAIUserMessageParam,
)
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
OpenAIResponsePreviousResponseWithInputItems,
OpenAIResponsesImpl,
)
from llama_stack.providers.utils.kvstore import KVStore
from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
@pytest.fixture
@ -65,21 +70,11 @@ def openai_responses_impl(mock_kvstore, mock_inference_api, mock_tool_groups_api
async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with a simple string input."""
# Setup
input_text = "Hello, world!"
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completion = OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(content="Hello! How can I help you?"),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
)
# Load the chat completion fixture
mock_chat_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.return_value = mock_chat_completion
# Execute
@ -92,7 +87,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
# Verify
mock_inference_api.openai_chat_completion.assert_called_once_with(
model=model,
messages=[OpenAIUserMessageParam(role="user", content="Hello, world!", name=None)],
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
tools=None,
stream=False,
temperature=0.1,
@ -100,55 +95,25 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
openai_responses_impl.persistence_store.set.assert_called_once()
assert result.model == model
assert len(result.output) == 1
assert isinstance(result.output[0], OpenAIResponseOutputMessage)
assert result.output[0].content[0].text == "Hello! How can I help you?"
assert isinstance(result.output[0], OpenAIResponseMessage)
assert result.output[0].content[0].text == "Dublin"
@pytest.mark.asyncio
async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with a simple string input and tools."""
# Setup
input_text = "What was the score of todays game?"
input_text = "What is the capital of Ireland?"
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_chat_completions = [
OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(
tool_calls=[
OpenAIChatCompletionToolCall(
id="tool_call_123",
type="function",
function=OpenAIChatCompletionToolCallFunction(
name="web_search", arguments='{"query":"What was the score of todays game?"}'
),
)
],
),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
),
OpenAIChatCompletion(
id="chat-completion-123",
choices=[
OpenAIChoice(
message=OpenAIAssistantMessageParam(content="The score of todays game was 10-12"),
finish_reason="stop",
index=0,
)
],
created=1234567890,
model=model,
),
]
# Load the chat completion fixtures
tool_call_completion = load_chat_completion_fixture("tool_call_completion.yaml")
tool_response_completion = load_chat_completion_fixture("simple_chat_completion.yaml")
mock_inference_api.openai_chat_completion.side_effect = mock_chat_completions
mock_inference_api.openai_chat_completion.side_effect = [
tool_call_completion,
tool_response_completion,
]
openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
identifier="web_search",
@ -163,7 +128,7 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
openai_responses_impl.tool_runtime_api.invoke_tool.return_value = ToolInvocationResult(
status="completed",
content="The score of todays game was 10-12",
content="Dublin",
)
# Execute
@ -180,23 +145,172 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
# Verify
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
assert first_call.kwargs["messages"][0].content == "What was the score of todays game?"
assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
assert first_call.kwargs["tools"] is not None
assert first_call.kwargs["temperature"] == 0.1
second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
assert second_call.kwargs["messages"][-1].content == "The score of todays game was 10-12"
assert second_call.kwargs["messages"][-1].content == "Dublin"
assert second_call.kwargs["temperature"] == 0.1
openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
tool_name="web_search",
kwargs={"query": "What was the score of todays game?"},
kwargs={"query": "What is the capital of Ireland?"},
)
openai_responses_impl.persistence_store.set.assert_called_once()
# Check that we got the content from our mocked tool execution result
assert len(result.output) >= 1
assert isinstance(result.output[1], OpenAIResponseOutputMessage)
assert result.output[1].content[0].text == "The score of todays game was 10-12"
assert isinstance(result.output[1], OpenAIResponseMessage)
assert result.output[1].content[0].text == "Dublin"
@pytest.mark.asyncio
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
"""Test creating an OpenAI response with multiple messages."""
# Setup
input_messages = [
OpenAIResponseMessage(role="developer", content="You are a helpful assistant", name=None),
OpenAIResponseMessage(role="user", content="Name some towns in Ireland", name=None),
OpenAIResponseMessage(
role="assistant",
content=[
OpenAIResponseInputMessageContentText(text="Galway, Longford, Sligo"),
OpenAIResponseInputMessageContentText(text="Dublin"),
],
name=None,
),
OpenAIResponseMessage(role="user", content="Which is the largest town in Ireland?", name=None),
]
model = "meta-llama/Llama-3.1-8B-Instruct"
mock_inference_api.openai_chat_completion.return_value = load_chat_completion_fixture("simple_chat_completion.yaml")
# Execute
await openai_responses_impl.create_openai_response(
input=input_messages,
model=model,
temperature=0.1,
)
# Verify the the correct messages were sent to the inference API i.e.
# All of the responses message were convered to the chat completion message objects
inference_messages = mock_inference_api.openai_chat_completion.call_args_list[0].kwargs["messages"]
for i, m in enumerate(input_messages):
if isinstance(m.content, str):
assert inference_messages[i].content == m.content
else:
assert inference_messages[i].content[0].text == m.content[0].text
assert isinstance(inference_messages[i].content[0], OpenAIChatCompletionContentPartTextParam)
assert inference_messages[i].role == m.role
if m.role == "user":
assert isinstance(inference_messages[i], OpenAIUserMessageParam)
elif m.role == "assistant":
assert isinstance(inference_messages[i], OpenAIAssistantMessageParam)
else:
assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
@pytest.mark.asyncio
async def test_prepend_previous_response_none(openai_responses_impl):
"""Test prepending no previous response to a new response."""
input = await openai_responses_impl._prepend_previous_response("fake_input", None)
assert input == "fake_input"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_prepend_previous_response_basic(get_previous_response_with_input, openai_responses_impl):
"""Test prepending a basic previous response to a new response."""
input_item_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
response_output_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_response")],
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[response_output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
input = await openai_responses_impl._prepend_previous_response("fake_input", "resp_123")
assert len(input) == 3
# Check for previous input
assert isinstance(input[0], OpenAIResponseMessage)
assert input[0].content[0].text == "fake_previous_input"
# Check for previous output
assert isinstance(input[1], OpenAIResponseMessage)
assert input[1].content[0].text == "fake_response"
# Check for new input
assert isinstance(input[2], OpenAIResponseMessage)
assert input[2].content == "fake_input"
@pytest.mark.asyncio
@patch.object(OpenAIResponsesImpl, "_get_previous_response_with_input")
async def test_prepend_previous_response_web_search(get_previous_response_with_input, openai_responses_impl):
"""Test prepending a web search previous response to a new response."""
input_item_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseInputMessageContentText(text="fake_previous_input")],
role="user",
)
input_items = OpenAIResponseInputItemList(data=[input_item_message])
output_web_search = OpenAIResponseOutputMessageWebSearchToolCall(
id="ws_123",
status="completed",
)
output_message = OpenAIResponseMessage(
id="123",
content=[OpenAIResponseOutputMessageContentOutputText(text="fake_web_search_response")],
status="completed",
role="assistant",
)
response = OpenAIResponseObject(
created_at=1,
id="resp_123",
model="fake_model",
output=[output_web_search, output_message],
status="completed",
)
previous_response = OpenAIResponsePreviousResponseWithInputItems(
input_items=input_items,
response=response,
)
get_previous_response_with_input.return_value = previous_response
input_messages = [OpenAIResponseMessage(content="fake_input", role="user")]
input = await openai_responses_impl._prepend_previous_response(input_messages, "resp_123")
assert len(input) == 4
# Check for previous input
assert isinstance(input[0], OpenAIResponseMessage)
assert input[0].content[0].text == "fake_previous_input"
# Check for previous output web search tool call
assert isinstance(input[1], OpenAIResponseOutputMessageWebSearchToolCall)
# Check for previous output web search response
assert isinstance(input[2], OpenAIResponseMessage)
assert input[2].content[0].text == "fake_web_search_response"
# Check for new input
assert isinstance(input[3], OpenAIResponseMessage)
assert input[3].content == "fake_input"

View file

@ -31,6 +31,26 @@ test_response_web_search:
search_context_size: "low"
output: "128"
test_response_custom_tool:
test_name: test_response_custom_tool
test_params:
case:
- case_id: "sf_weather"
input: "What's the weather like in San Francisco?"
tools:
- type: function
name: get_weather
description: Get current temperature for a given location.
parameters:
additionalProperties: false
properties:
location:
description: "City and country e.g. Bogot\xE1, Colombia"
type: string
required:
- location
type: object
test_response_image:
test_name: test_response_image
test_params:

View file

@ -124,6 +124,28 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
assert case["output"].lower() in response.output_text.lower().strip()
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_custom_tool"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case):
test_name_base = get_base_test_name(request)
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
response = openai_client.responses.create(
model=model,
input=case["input"],
tools=case["tools"],
stream=False,
)
assert len(response.output) == 1
assert response.output[0].type == "function_call"
assert response.output[0].status == "completed"
assert response.output[0].name == "get_weather"
@pytest.mark.parametrize(
"case",
responses_test_cases["test_response_image"]["test_params"]["case"],