mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
fix: don't pass default response format in Responses (#3614)
# What does this PR do? Fireworks doesn't allow repsonse_format with tool use. The default response format is 'text' anyway, so we can safely omit. ## Test Plan Below script failed without the change, runs after. ``` #!/usr/bin/env python3 """ Script to test Responses API with kubernetes-mcp-server. This script: 1. Connects to the llama stack server 2. Uses the Responses API with MCP tools 3. Asks for the list of Kubernetes namespaces using the kubernetes-mcp-server """ import json from openai import OpenAI # Connect to the llama stack server base_url = "http://localhost:8321/v1" client = OpenAI(base_url=base_url, api_key="fake") # Define the MCP tool pointing to the kubernetes-mcp-server # The kubernetes-mcp-server is running on port 3000 with SSE endpoint at /sse mcp_server_url = "http://localhost:3000/sse" tools = [ { "type": "mcp", "server_label": "k8s", "server_url": mcp_server_url, } ] # Create a response request asking for k8s namespaces print("Sending request to list Kubernetes namespaces...") print(f"Using MCP server at: {mcp_server_url}") print("Available tools will be listed automatically by the MCP server.") print() response = client.responses.create( # model="meta-llama/Llama-3.2-3B-Instruct", # Using the vllm model model="fireworks/accounts/fireworks/models/llama4-scout-instruct-basic", # model="openai/gpt-4o", input="what are all the Kubernetes namespaces? Use tool call to `namespaces_list`. make sure to adhere to the tool calling format UNDER ALL CIRCUMSTANCES.", tools=tools, stream=False, ) print("\n" + "=" * 80) print("RESPONSE OUTPUT:") print("=" * 80) # Print the output for i, output in enumerate(response.output): print(f"\n[Output {i + 1}] Type: {output.type}") if output.type == "mcp_list_tools": print(f" Server: {output.server_label}") print(f" Tools available: {[t.name for t in output.tools]}") elif output.type == "mcp_call": print(f" Tool called: {output.name}") print(f" Arguments: {output.arguments}") print(f" Result: {output.output}") if output.error: print(f" Error: {output.error}") elif output.type == "message": print(f" Role: {output.role}") print(f" Content: {output.content}") print("\n" + "=" * 80) print("FINAL RESPONSE TEXT:") print("=" * 80) print(response.output_text) ```
This commit is contained in:
parent
d350e3662b
commit
ac7c35fbe6
10 changed files with 7573 additions and 89 deletions
|
@ -37,7 +37,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIJSONSchema,
|
||||
OpenAIResponseFormatJSONObject,
|
||||
OpenAIResponseFormatJSONSchema,
|
||||
OpenAIResponseFormatText,
|
||||
OpenAIUserMessageParam,
|
||||
)
|
||||
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
|
||||
|
@ -148,7 +147,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
|||
mock_inference_api.openai_chat_completion.assert_called_once_with(
|
||||
model=model,
|
||||
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
|
||||
response_format=OpenAIResponseFormatText(),
|
||||
response_format=None,
|
||||
tools=None,
|
||||
stream=True,
|
||||
temperature=0.1,
|
||||
|
@ -823,16 +822,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
|
|||
@pytest.mark.parametrize(
|
||||
"text_format, response_format",
|
||||
[
|
||||
(OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()),
|
||||
(OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None),
|
||||
(
|
||||
OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")),
|
||||
OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})),
|
||||
),
|
||||
(OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()),
|
||||
# ensure text param with no format specified defaults to text
|
||||
(OpenAIResponseText(format=None), OpenAIResponseFormatText()),
|
||||
# ensure text param of None defaults to text
|
||||
(None, OpenAIResponseFormatText()),
|
||||
# ensure text param with no format specified defaults to None
|
||||
(OpenAIResponseText(format=None), None),
|
||||
# ensure text param of None defaults to None
|
||||
(None, None),
|
||||
],
|
||||
)
|
||||
async def test_create_openai_response_with_text_format(
|
||||
|
@ -855,7 +854,6 @@ async def test_create_openai_response_with_text_format(
|
|||
# Verify
|
||||
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
||||
assert first_call.kwargs["messages"][0].content == input_text
|
||||
assert first_call.kwargs["response_format"] is not None
|
||||
assert first_call.kwargs["response_format"] == response_format
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue