llama-stack-mirror/tests/integration/recordings/responses/models-bd032f995f2a-7467c0cf.json
ehhuang ac7c35fbe6
fix: don't pass default response format in Responses (#3614)
# What does this PR do?
Fireworks doesn't allow repsonse_format with tool use. The default
response format is 'text' anyway, so we can safely omit.


## Test Plan
Below script failed without the change, runs after.

```
#!/usr/bin/env python3
"""
Script to test Responses API with kubernetes-mcp-server.

This script:
1. Connects to the llama stack server
2. Uses the Responses API with MCP tools
3. Asks for the list of Kubernetes namespaces using the kubernetes-mcp-server
"""

import json

from openai import OpenAI

# Connect to the llama stack server
base_url = "http://localhost:8321/v1"
client = OpenAI(base_url=base_url, api_key="fake")

# Define the MCP tool pointing to the kubernetes-mcp-server
# The kubernetes-mcp-server is running on port 3000 with SSE endpoint at /sse
mcp_server_url = "http://localhost:3000/sse"

tools = [
    {
        "type": "mcp",
        "server_label": "k8s",
        "server_url": mcp_server_url,
    }
]

# Create a response request asking for k8s namespaces
print("Sending request to list Kubernetes namespaces...")
print(f"Using MCP server at: {mcp_server_url}")
print("Available tools will be listed automatically by the MCP server.")
print()

response = client.responses.create(
    # model="meta-llama/Llama-3.2-3B-Instruct",  # Using the vllm model
    model="fireworks/accounts/fireworks/models/llama4-scout-instruct-basic",
    # model="openai/gpt-4o",
    input="what are all the Kubernetes namespaces? Use tool call to `namespaces_list`. make sure to adhere to the tool calling format UNDER ALL CIRCUMSTANCES.",
    tools=tools,
    stream=False,
)

print("\n" + "=" * 80)
print("RESPONSE OUTPUT:")
print("=" * 80)

# Print the output
for i, output in enumerate(response.output):
    print(f"\n[Output {i + 1}] Type: {output.type}")
    if output.type == "mcp_list_tools":
        print(f"  Server: {output.server_label}")
        print(f"  Tools available: {[t.name for t in output.tools]}")
    elif output.type == "mcp_call":
        print(f"  Tool called: {output.name}")
        print(f"  Arguments: {output.arguments}")
        print(f"  Result: {output.output}")
        if output.error:
            print(f"  Error: {output.error}")
    elif output.type == "message":
        print(f"  Role: {output.role}")
        print(f"  Content: {output.content}")

print("\n" + "=" * 80)
print("FINAL RESPONSE TEXT:")
print("=" * 80)
print(response.output_text)
```
2025-09-30 14:52:24 -07:00

69 lines
1.6 KiB
JSON

{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "nomic-embed-text:latest",
"created": 1754610899,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama-guard3:1b",
"created": 1754088388,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:l6-v2",
"created": 1753826826,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:latest",
"created": 1749064003,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.1:8b-instruct-fp16",
"created": 1739575404,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b-instruct-fp16",
"created": 1737496003,
"object": "model",
"owned_by": "library"
}
}
],
"is_streaming": false
}
}