feat(responses)!: add reasoning and annotation added events (#3793)

Implements missing streaming events from OpenAI Responses API spec: 
 - reasoning text/summary events for o1/o3 models, 
 - refusal events for safety moderation
 - annotation events for citations, 
 - and file search streaming events. 
 
Added optional reasoning_content field to chat completion chunks to
support non-standard provider extensions.

**NOTE:** OpenAI does _not_ fill reasoning_content when users use the
chat_completion APIs. This means there is no way for us to implement
Responses (with reasoning) by using OpenAI chat completions! We'd need
to transparently punt to OpenAI's responses endpoints if we wish to do
that. For others though (vLLM, etc.) we can use it.

## Test Plan

File search streaming test passes:
```
./scripts/integration-tests.sh --stack-config server:ci-tests \
   --suite responses --setup gpt --inference-mode replay --pattern test_response_file_search_streaming_events
```

Need more complex setup and validation for reasoning tests (need a vLLM
powered OSS model maybe gpt-oss which can return reasoning_content). I
will do that in a followup PR.
This commit is contained in:
Ashwin Bharambe 2025-10-11 16:47:14 -07:00 committed by GitHub
parent f365961731
commit 7c63aebd64
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 23530 additions and 2 deletions

View file

@ -0,0 +1,364 @@
{
"test_id": "tests/integration/responses/test_file_search.py::test_response_file_search_streaming_events[client_with_models-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What are the marketing updates?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_yrecP7RchFwSrzeZd9oqtJEN",
"function": {
"arguments": "",
"name": "knowledge_search"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "FO68"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "TJ3"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "query",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "4"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\":\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "r"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "marketing",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "uV08LeJq8kRvD"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " updates",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "7ouSqhLcxzdGuA"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "uug"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "jBmu"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-3ae0877c874c",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": {
"completion_tokens": 16,
"prompt_tokens": 66,
"total_tokens": 82,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "9QmaFTrTZ7uhZnV"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,512 @@
{
"test_id": "tests/integration/responses/test_file_search.py::test_response_file_search_streaming_events[openai_client-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What are the marketing updates?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "knowledge_search",
"description": "Search for information in a database.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for. Can be a natural language sentence or keywords."
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_Vdxnbo2D8ds3BuKCon8XUt9P",
"function": {
"arguments": "",
"name": "knowledge_search"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "ypKL"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "sKu"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "query",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "B"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\":\"",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "R"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "marketing",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "d8dj126pSbu16"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " updates",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "dKd51Xg2hSEbZR"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " October",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "nQtsJsUFk8Ku6B"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " ",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "fEjZX"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "202",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "Oht"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "3",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "eEjRs"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "DFm"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": null,
"obfuscation": "ZDAJ"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-99de86ac838b",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_f64f290af2",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 66,
"total_tokens": 86,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "TAto5fU1PVZJ7fG"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -318,3 +318,48 @@ def test_response_file_search_filter_compound_or(compat_client, text_model_id, v
# Verify we got at least one of the expected categories
assert len(categories_found) > 0, "Should have found at least one marketing or sales file"
assert categories_found.issubset({"marketing", "sales"}), f"Found unexpected categories: {categories_found}"
def test_response_file_search_streaming_events(compat_client, text_model_id, vector_store_with_filtered_files):
"""Test that file search emits proper streaming events (in_progress, searching, completed)."""
tools = [
{
"type": "file_search",
"vector_store_ids": [vector_store_with_filtered_files.id],
}
]
stream = compat_client.responses.create(
model=text_model_id,
input="What are the marketing updates?",
tools=tools,
stream=True,
)
chunks = []
for chunk in stream:
chunks.append(chunk)
event_types = [chunk.type for chunk in chunks]
# Verify file search streaming events are present
file_search_in_progress = [chunk for chunk in chunks if chunk.type == "response.file_search_call.in_progress"]
file_search_searching = [chunk for chunk in chunks if chunk.type == "response.file_search_call.searching"]
file_search_completed = [chunk for chunk in chunks if chunk.type == "response.file_search_call.completed"]
assert len(file_search_in_progress) > 0, (
f"Expected response.file_search_call.in_progress events, got chunk types: {event_types}"
)
assert len(file_search_searching) > 0, (
f"Expected response.file_search_call.searching events, got chunk types: {event_types}"
)
assert len(file_search_completed) > 0, (
f"Expected response.file_search_call.completed events, got chunk types: {event_types}"
)
# Verify final response has file search call
final_chunk = chunks[-1]
if hasattr(final_chunk, "response"):
file_search_calls = [output for output in final_chunk.response.output if output.type == "file_search_call"]
assert len(file_search_calls) > 0, "Expected at least one file_search_call in final response"
assert file_search_calls[0].status == "completed"