OpenAPI Responses - move tests under tests/verifications

This moves the OpenAI Responses API tests under
tests/verifications/openai_api/test_response.py and starts to wire
them up to our verification suite, so that we can test multiple
providers as well as OpenAI directly for the Responses API.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-18 15:26:34 -04:00 committed by Ashwin Bharambe
parent 591e6a3972
commit 207224a811
14 changed files with 353 additions and 273 deletions

View file

@ -75,11 +75,27 @@ class OpenAIResponseObject(BaseModel):
@json_schema_type
class OpenAIResponseObjectStream(BaseModel):
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
response: OpenAIResponseObject
type: Literal["response.created"] = "response.created"
@json_schema_type
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
response: OpenAIResponseObject
type: Literal["response.completed"] = "response.completed"
OpenAIResponseObjectStream = Annotated[
Union[
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseObjectStreamResponseCompleted,
],
Field(discriminator="type"),
]
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
@json_schema_type
class OpenAIResponseInputMessageContentText(BaseModel):
text: str
@ -112,6 +128,7 @@ class OpenAIResponseInputMessage(BaseModel):
@json_schema_type
class OpenAIResponseInputToolWebSearch(BaseModel):
type: Literal["web_search", "web_search_preview_2025_03_11"] = "web_search"
# TODO: actually use search_context_size somewhere...
search_context_size: Optional[str] = Field(default="medium", pattern="^low|medium|high$")
# TODO: add user_location

View file

@ -33,6 +33,8 @@ from llama_stack.apis.openai_responses.openai_responses import (
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseObjectStreamResponseCompleted,
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseOutput,
OpenAIResponseOutputMessage,
OpenAIResponseOutputMessageContentOutputText,
@ -174,7 +176,8 @@ class OpenAIResponsesImpl(OpenAIResponses):
for chunk_choice in chunk.choices:
# TODO: this only works for text content
chat_response_content.append(chunk_choice.delta.content or "")
chunk_finish_reason = chunk_choice.finish_reason
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
chat_response = OpenAIChatCompletion(
id=chat_response_id,
@ -219,7 +222,9 @@ class OpenAIResponsesImpl(OpenAIResponses):
if stream:
async def async_response() -> AsyncIterator[OpenAIResponseObjectStream]:
yield OpenAIResponseObjectStream(response=response)
# TODO: response created should actually get emitted much earlier in the process
yield OpenAIResponseObjectStreamResponseCreated(response=response)
yield OpenAIResponseObjectStreamResponseCompleted(response=response)
return async_response()
@ -270,40 +275,40 @@ class OpenAIResponsesImpl(OpenAIResponses):
# Add the assistant message with tool_calls response to the messages list
messages.append(choice.message)
# TODO: handle multiple tool calls
tool_call = choice.message.tool_calls[0]
tool_call_id = tool_call.id
function = tool_call.function
for tool_call in choice.message.tool_calls:
tool_call_id = tool_call.id
function = tool_call.function
# If for some reason the tool call doesn't have a function or id, we can't execute it
if not function or not tool_call_id:
return output_messages
# If for some reason the tool call doesn't have a function or id, we can't execute it
if not function or not tool_call_id:
continue
# TODO: telemetry spans for tool calls
result = await self._execute_tool_call(function)
# TODO: telemetry spans for tool calls
result = await self._execute_tool_call(function)
# Handle tool call failure
if not result:
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="failed",
)
)
continue
# Handle tool call failure
if not result:
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="failed",
)
status="completed",
),
)
return output_messages
output_messages.append(
OpenAIResponseOutputMessageWebSearchToolCall(
id=tool_call_id,
status="completed",
),
)
result_content = ""
# TODO: handle other result content types and lists
if isinstance(result.content, str):
result_content = result.content
messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
result_content = ""
# TODO: handle other result content types and lists
if isinstance(result.content, str):
result_content = result.content
messages.append(OpenAIToolMessageParam(content=result_content, tool_call_id=tool_call_id))
tool_results_chat_response = await self.inference_api.openai_chat_completion(
model=model_id,
messages=messages,