Implement include parameter specifically for adding logprobs in the output message

This commit is contained in:
Shabana Baig 2025-11-30 11:53:57 -05:00
parent 4ff0c25c52
commit 7d6c0aaf11
10 changed files with 255 additions and 8 deletions

View file

@ -12,6 +12,22 @@ from .fixtures.test_cases import basic_test_cases, image_test_cases, multi_turn_
from .streaming_assertions import StreamingValidator
def provider_from_model(responses_client, text_model_id):
models = {m.id: m for m in responses_client.models.list()}
models.update(
{m.custom_metadata["provider_resource_id"]: m for m in responses_client.models.list() if m.custom_metadata}
)
provider_id = models[text_model_id].custom_metadata["provider_id"]
providers = {p.provider_id: p for p in responses_client.providers.list()}
return providers[provider_id]
def skip_if_chat_completions_logprobs_not_supported(responses_client, text_model_id):
provider_type = provider_from_model(responses_client, text_model_id).provider_type
if provider_type in ("remote::ollama",):
pytest.skip(f"Model {text_model_id} hosted by {provider_type} doesn't support /v1/chat/completions logprobs.")
@pytest.mark.parametrize("case", basic_test_cases)
def test_response_non_streaming_basic(responses_client, text_model_id, case):
response = responses_client.responses.create(
@ -206,3 +222,153 @@ def test_response_non_streaming_multi_turn_image(responses_client, text_model_id
previous_response_id = response.id
output_text = response.output_text.lower()
assert turn_expected.lower() in output_text
def test_include_logprobs_non_streaming(client_with_models, text_model_id):
"""Test logprobs inclusion in responses with the include parameter."""
skip_if_chat_completions_logprobs_not_supported(client_with_models, text_model_id)
input = "Which planet do humans live on?"
include = ["message.output_text.logprobs"]
# Create a response without include["message.output_text.logprobs"]
response_w_o_logprobs = client_with_models.responses.create(
model=text_model_id,
input=input,
stream=False,
)
# Verify we got one output message and no logprobs
assert len(response_w_o_logprobs.output) == 1
message_outputs = [output for output in response_w_o_logprobs.output if output.type == "message"]
assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
assert message_outputs[0].content[0].logprobs is None, "Expected no logprobs in the returned response"
# Create a response with include["message.output_text.logprobs"]
response_with_logprobs = client_with_models.responses.create(
model=text_model_id,
input=input,
stream=False,
include=include,
)
# Verify we got one output message and output message has logprobs
assert len(response_with_logprobs.output) == 1
message_outputs = [output for output in response_with_logprobs.output if output.type == "message"]
assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
assert message_outputs[0].content[0].logprobs is not None, (
"Expected logprobs in the returned response, but none were returned"
)
def test_include_logprobs_streaming(client_with_models, text_model_id):
"""Test logprobs inclusion in responses with the include parameter."""
skip_if_chat_completions_logprobs_not_supported(client_with_models, text_model_id)
input = "Which planet do humans live on?"
include = ["message.output_text.logprobs"]
# Create a streaming response with include["message.output_text.logprobs"]
stream = client_with_models.responses.create(
model=text_model_id,
input=input,
stream=True,
include=include,
)
for chunk in stream:
if chunk.type == "response.completed":
message_outputs = [output for output in chunk.response.output if output.type == "message"]
assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
assert message_outputs[0].content[0].logprobs is not None, (
f"Expected logprobs in the returned chunk ({chunk.type=}), but none were returned"
)
elif chunk.type == "response.output_item.done":
content = chunk.item.content
assert len(content) == 1, f"Expected one content object, got {len(content)}"
assert content[0].logprobs is not None, (
f"Expected logprobs in the returned chunk ({chunk.type=}), but none were returned"
)
elif chunk.type in ["response.output_text.delta", "response.output_text.done"]:
assert chunk.logprobs is not None, (
f"Expected logprobs in the returned chunk ({chunk.type=}), but none were returned"
)
elif chunk.type == "response.content_part.done":
assert chunk.part.logprobs is None, f"Expected no logprobs in the returned chunk ({chunk.type=})"
def test_include_logprobs_with_web_search(client_with_models, text_model_id):
"""Test include logprobs with built-in tool."""
skip_if_chat_completions_logprobs_not_supported(client_with_models, text_model_id)
input = "Search for a positive news story from today."
include = ["message.output_text.logprobs"]
tools = [
{
"type": "web_search",
}
]
# Create a response with built-in tool and include["message.output_text.logprobs"]
response = client_with_models.responses.create(
model=text_model_id,
input=input,
stream=False,
include=include,
tools=tools,
)
# Verify we got one built-in tool call and output message has logprobs
assert len(response.output) >= 2
assert response.output[0].type == "web_search_call"
assert response.output[0].status == "completed"
message_outputs = [output for output in response.output if output.type == "message"]
assert len(message_outputs) == 1, f"Expected one message output, got {len(message_outputs)}"
assert message_outputs[0].content[0].logprobs is not None, (
"Expected logprobs in the returned response, but none were returned"
)
def test_include_logprobs_with_function_tools(client_with_models, text_model_id):
"""Test include logprobs with function tools."""
skip_if_chat_completions_logprobs_not_supported(client_with_models, text_model_id)
input = "What is the weather in Paris?"
include = ["message.output_text.logprobs"]
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get weather information for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')",
},
},
},
},
]
# Create a response with function tool and include["message.output_text.logprobs"]
response = client_with_models.responses.create(
model=text_model_id,
input=input,
stream=False,
include=include,
tools=tools,
)
# Verify we got one function tool call and no logprobs
assert len(response.output) == 1
assert response.output[0].type == "function_call"
assert response.output[0].name == "get_weather"
assert response.output[0].status == "completed"
message_outputs = [output for output in response.output if output.type == "message"]
assert len(message_outputs) == 0, f"Expected no message output, got {len(message_outputs)}"