feat(responses): add more streaming response types (#2375)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 6s
Integration Tests / test-matrix (http, agents) (push) Failing after 9s
Integration Tests / test-matrix (http, scoring) (push) Failing after 8s
Integration Tests / test-matrix (http, inspect) (push) Failing after 9s
Integration Tests / test-matrix (http, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, datasets) (push) Failing after 9s
Integration Tests / test-matrix (http, datasets) (push) Failing after 11s
Integration Tests / test-matrix (library, agents) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 11s
Integration Tests / test-matrix (http, providers) (push) Failing after 10s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (library, inference) (push) Failing after 7s
Integration Tests / test-matrix (library, inspect) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 7s
Integration Tests / test-matrix (library, providers) (push) Failing after 7s
Integration Tests / test-matrix (library, post_training) (push) Failing after 9s
Unit Tests / unit-tests (3.10) (push) Failing after 7s
Integration Tests / test-matrix (library, scoring) (push) Failing after 9s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 10s
Update ReadTheDocs / update-readthedocs (push) Failing after 6s
Unit Tests / unit-tests (3.11) (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 34s
Pre-commit / pre-commit (push) Successful in 1m21s

This commit is contained in:
Ashwin Bharambe 2025-06-03 15:48:41 -07:00 committed by GitHub
parent d96f6ec763
commit ed69c1b3cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 1003 additions and 14 deletions

View file

@ -179,6 +179,30 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel):
type: Literal["response.created"] = "response.created"
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
response_id: str
item: OpenAIResponseOutput
output_index: int
sequence_number: int
type: Literal["response.output_item.added"] = "response.output_item.added"
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
response_id: str
item: OpenAIResponseOutput
output_index: int
sequence_number: int
type: Literal["response.output_item.done"] = "response.output_item.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
content_index: int
@ -190,14 +214,132 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
content_index: int
text: str # final text of the output item
item_id: str
output_index: int
sequence_number: int
type: Literal["response.output_text.done"] = "response.output_text.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
delta: str
item_id: str
output_index: int
sequence_number: int
type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
@json_schema_type
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
arguments: str # final arguments of the function call
item_id: str
output_index: int
sequence_number: int
type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress"
@json_schema_type
class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching"
@json_schema_type
class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
sequence_number: int
type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
sequence_number: int
type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
sequence_number: int
type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
delta: str
item_id: str
output_index: int
sequence_number: int
type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
arguments: str # final arguments of the MCP call
item_id: str
output_index: int
sequence_number: int
type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
item_id: str
output_index: int
sequence_number: int
type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
sequence_number: int
type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
@json_schema_type
class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
sequence_number: int
type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
OpenAIResponseObjectStream = Annotated[
OpenAIResponseObjectStreamResponseCreated
| OpenAIResponseObjectStreamResponseOutputItemAdded
| OpenAIResponseObjectStreamResponseOutputItemDone
| OpenAIResponseObjectStreamResponseOutputTextDelta
| OpenAIResponseObjectStreamResponseOutputTextDone
| OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
| OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
| OpenAIResponseObjectStreamResponseWebSearchCallInProgress
| OpenAIResponseObjectStreamResponseWebSearchCallSearching
| OpenAIResponseObjectStreamResponseWebSearchCallCompleted
| OpenAIResponseObjectStreamResponseMcpListToolsInProgress
| OpenAIResponseObjectStreamResponseMcpListToolsFailed
| OpenAIResponseObjectStreamResponseMcpListToolsCompleted
| OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
| OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
| OpenAIResponseObjectStreamResponseMcpCallInProgress
| OpenAIResponseObjectStreamResponseMcpCallFailed
| OpenAIResponseObjectStreamResponseMcpCallCompleted
| OpenAIResponseObjectStreamResponseCompleted,
Field(discriminator="type"),
]

View file

@ -433,12 +433,10 @@ class OpenAIResponsesImpl:
store: bool | None,
text: OpenAIResponseText,
tools: list[OpenAIResponseInputTool] | None,
max_infer_iters: int | None,
max_infer_iters: int,
) -> OpenAIResponseObject:
# Implement tool execution loop - handle ALL inference rounds including the first
n_iter = 0
messages = ctx.messages.copy()
current_response = None
while True:
# Do inference (including the first one)
@ -450,13 +448,13 @@ class OpenAIResponsesImpl:
temperature=ctx.temperature,
response_format=ctx.response_format,
)
current_response = OpenAIChatCompletion(**inference_result.model_dump())
completion = OpenAIChatCompletion(**inference_result.model_dump())
# Separate function vs non-function tool calls
function_tool_calls = []
non_function_tool_calls = []
for choice in current_response.choices:
for choice in completion.choices:
if choice.message.tool_calls and tools:
for tool_call in choice.message.tool_calls:
if self._is_function_tool_call(tool_call, tools):
@ -468,7 +466,7 @@ class OpenAIResponsesImpl:
if function_tool_calls:
# For function tool calls, use existing logic and return immediately
current_output_messages = await self._process_response_choices(
chat_response=current_response,
chat_response=completion,
ctx=ctx,
tools=tools,
)
@ -476,7 +474,7 @@ class OpenAIResponsesImpl:
break
elif non_function_tool_calls:
# For non-function tool calls, execute them and continue loop
for choice in current_response.choices:
for choice in completion.choices:
tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
output_messages.extend(tool_outputs)
@ -485,19 +483,19 @@ class OpenAIResponsesImpl:
messages.extend(tool_response_messages)
n_iter += 1
if n_iter >= (max_infer_iters or 10):
if n_iter >= max_infer_iters:
break
# Continue with next iteration of the loop
continue
else:
# No tool calls - convert response to message and we're done
for choice in current_response.choices:
for choice in completion.choices:
output_messages.append(await _convert_chat_choice_to_response_message(choice))
break
response = OpenAIResponseObject(
created_at=current_response.created,
created_at=completion.created,
id=f"resp-{uuid.uuid4()}",
model=model,
object="response",
@ -549,7 +547,6 @@ class OpenAIResponsesImpl:
messages = ctx.messages.copy()
while True:
# Do inference (including the first one) - streaming
current_inference_result = await self.inference_api.openai_chat_completion(
model=ctx.model,
messages=messages,