From cad646478f09ba115d53c42d3d6e4816e7965f36 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 27 May 2025 12:46:03 -0700 Subject: [PATCH] fixes, update test to be more robust --- .../agents/meta_reference/openai_responses.py | 40 ++++++++----------- .../fixtures/test_cases/responses.yaml | 9 ++--- 2 files changed, 20 insertions(+), 29 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index dd3f2902a..5d5f9ef94 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -263,12 +263,9 @@ class OpenAIResponsesImpl: chat_response: OpenAIChatCompletion, ctx: ChatCompletionContext, tools: list[OpenAIResponseInputTool] | None, - output_messages: list[OpenAIResponseOutput], ) -> list[OpenAIResponseOutput]: - """ - Handle tool execution and response message creation. - Returns: updated output_messages list - """ + """Handle tool execution and response message creation.""" + output_messages: list[OpenAIResponseOutput] = [] # Execute tool calls if any for choice in chat_response.choices: if choice.message.tool_calls and tools: @@ -362,6 +359,8 @@ class OpenAIResponsesImpl: temperature=temperature, ) + print(f"chat_tools: {chat_tools}") + print(f"messages: {messages}") inference_result = await self.inference_api.openai_chat_completion( model=model, messages=messages, @@ -404,11 +403,12 @@ class OpenAIResponsesImpl: chat_response = OpenAIChatCompletion(**inference_result.model_dump()) # Process response choices (tool execution and message creation) - output_messages = await self._process_response_choices( - chat_response=chat_response, - ctx=ctx, - tools=tools, - output_messages=output_messages, + output_messages.extend( + await self._process_response_choices( + chat_response=chat_response, + ctx=ctx, + tools=tools, + ) ) response = OpenAIResponseObject( @@ -525,11 +525,12 @@ class OpenAIResponsesImpl: ) # Process response choices (tool execution and message creation) - output_messages = await self._process_response_choices( - chat_response=chat_response_obj, - ctx=ctx, - tools=tools, - output_messages=output_messages, + output_messages.extend( + await self._process_response_choices( + chat_response=chat_response_obj, + ctx=ctx, + tools=tools, + ) ) # Create final response @@ -589,15 +590,6 @@ class OpenAIResponsesImpl: chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump())) elif input_tool.type == "web_search": tool_name = "web_search" - - # we need to list all the toolgroups so tools can be found. avoid MCPs because they - # may need authentication. - groups = await self.tool_groups_api.list_tool_groups() - for group in groups.data: - if group.mcp_endpoint: - continue - _ = await self.tool_groups_api.list_tools(group.identifier) - tool = await self.tool_groups_api.get_tool(tool_name) if not tool: raise ValueError(f"Tool {tool_name} not found") diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml index d8b8d40c5..51c7814a3 100644 --- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml +++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml @@ -77,11 +77,12 @@ test_response_image: image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg" output: "llama" +# the models are really poor at tool calling after seeing images :/ test_response_multi_turn_image: test_name: test_response_multi_turn_image test_params: case: - - case_id: "llama_image_search" + - case_id: "llama_image_understanding" turns: - input: - role: user @@ -91,7 +92,5 @@ test_response_multi_turn_image: - type: input_image image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg" output: "llama" - - input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick', 'scout' and 'llm'" - tools: - - type: web_search - output: "model" + - input: "What country do you find this animal primarily in? What continent?" + output: "peru"