llama-stack-mirror/tests/integration/responses/recordings/639c49548ecb34e8dedcc65738498e22c6d0dc67fbf9cb852f4a99f653eab6f3.json
Ashwin Bharambe e9b4278a51
feat(responses)!: improve responses + conversations implementations (#3810)
This PR updates the Conversation item related types and improves a
couple critical parts of the implemenation:

- it creates a streaming output item for the final assistant message
output by
  the model. until now we only added content parts and included that
  message in the final response.

- rewrites the conversation update code completely to account for items
  other than messages (tool calls, outputs, etc.)

## Test Plan

Used the test script from
https://github.com/llamastack/llama-stack-client-python/pull/281 for
this

```
TEST_API_BASE_URL=http://localhost:8321/v1 \
  pytest tests/integration/test_agent_turn_step_events.py::test_client_side_function_tool -xvs
```
2025-10-15 09:36:11 -07:00

503 lines
18 KiB
JSON

{
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_web_search[client_with_models-txt=openai/gpt-4o-llama_experts]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "How many experts does the Llama 4 Maverick model have?"
},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"index": 0,
"id": "call_kyXmbn7mAgkmCuj7wN4zQ1Wx",
"type": "function",
"function": {
"name": "web_search",
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_kyXmbn7mAgkmCuj7wN4zQ1Wx",
"content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "web_search",
"description": "Search the web for information",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The query to search for"
}
},
"required": [
"query"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "yLGy9MPvGaT3lO"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": "The",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "jUGNatdHii0ur"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " L",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "z1Jy1yQr9MLb0J"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": "lama",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "9JDP9FpEugAO"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "trKSUgcuaQTm7rO"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": "4",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "S9oWlTou49qwy61"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " Maver",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "mdn9lEHn4i"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": "ick",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "tR60XL85doJ4u"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " model",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "QFWYthsV50"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " has",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "ubPYxi5GV3PF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " ",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "JYlHYkAeWG43lud"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": "128",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "8PG9wCjDGS7kB"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": " experts",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "dXqNhIUO"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "rvMyY7JQDhoDG5b"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": null,
"obfuscation": "OQSxN17glN"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-639c49548ecb",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_cbf1785567",
"usage": {
"completion_tokens": 14,
"prompt_tokens": 986,
"total_tokens": 1000,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "Uf4PLyBNwljP"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}