mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 09:05:37 +00:00 
			
		
		
		
	This PR updates the Conversation item related types and improves a couple critical parts of the implemenation: - it creates a streaming output item for the final assistant message output by the model. until now we only added content parts and included that message in the final response. - rewrites the conversation update code completely to account for items other than messages (tool calls, outputs, etc.) ## Test Plan Used the test script from https://github.com/llamastack/llama-stack-client-python/pull/281 for this ``` TEST_API_BASE_URL=http://localhost:8321/v1 \ pytest tests/integration/test_agent_turn_step_events.py::test_client_side_function_tool -xvs ```
		
			
				
	
	
		
			503 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
		
			Generated
		
	
	
			
		
		
	
	
			503 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
		
			Generated
		
	
	
| {
 | |
|   "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_web_search[client_with_models-txt=openai/gpt-4o-llama_experts]",
 | |
|   "request": {
 | |
|     "method": "POST",
 | |
|     "url": "https://api.openai.com/v1/v1/chat/completions",
 | |
|     "headers": {},
 | |
|     "body": {
 | |
|       "model": "gpt-4o",
 | |
|       "messages": [
 | |
|         {
 | |
|           "role": "user",
 | |
|           "content": "How many experts does the Llama 4 Maverick model have?"
 | |
|         },
 | |
|         {
 | |
|           "role": "assistant",
 | |
|           "content": "",
 | |
|           "tool_calls": [
 | |
|             {
 | |
|               "index": 0,
 | |
|               "id": "call_kyXmbn7mAgkmCuj7wN4zQ1Wx",
 | |
|               "type": "function",
 | |
|               "function": {
 | |
|                 "name": "web_search",
 | |
|                 "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
 | |
|               }
 | |
|             }
 | |
|           ]
 | |
|         },
 | |
|         {
 | |
|           "role": "tool",
 | |
|           "tool_call_id": "call_kyXmbn7mAgkmCuj7wN4zQ1Wx",
 | |
|           "content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create(  model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\",   messages =[  {  \\\"role\\\":  \\\"user\\\",   \\\"content\\\":  \\\"Explain why fast inference is critical for reasoning models\\\"   }   ]  )  print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}"
 | |
|         }
 | |
|       ],
 | |
|       "stream": true,
 | |
|       "stream_options": {
 | |
|         "include_usage": true
 | |
|       },
 | |
|       "tools": [
 | |
|         {
 | |
|           "type": "function",
 | |
|           "function": {
 | |
|             "name": "web_search",
 | |
|             "description": "Search the web for information",
 | |
|             "parameters": {
 | |
|               "type": "object",
 | |
|               "properties": {
 | |
|                 "query": {
 | |
|                   "type": "string",
 | |
|                   "description": "The query to search for"
 | |
|                 }
 | |
|               },
 | |
|               "required": [
 | |
|                 "query"
 | |
|               ]
 | |
|             }
 | |
|           }
 | |
|         }
 | |
|       ]
 | |
|     },
 | |
|     "endpoint": "/v1/chat/completions",
 | |
|     "model": "gpt-4o"
 | |
|   },
 | |
|   "response": {
 | |
|     "body": [
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": "",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": "assistant",
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "yLGy9MPvGaT3lO"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": "The",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "jUGNatdHii0ur"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " L",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "z1Jy1yQr9MLb0J"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": "lama",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "9JDP9FpEugAO"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " ",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "trKSUgcuaQTm7rO"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": "4",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "S9oWlTou49qwy61"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " Maver",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "mdn9lEHn4i"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": "ick",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "tR60XL85doJ4u"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " model",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "QFWYthsV50"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " has",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "ubPYxi5GV3PF"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " ",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "JYlHYkAeWG43lud"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": "128",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "8PG9wCjDGS7kB"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": " experts",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "dXqNhIUO"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": ".",
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": null,
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "rvMyY7JQDhoDG5b"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [
 | |
|             {
 | |
|               "delta": {
 | |
|                 "content": null,
 | |
|                 "function_call": null,
 | |
|                 "refusal": null,
 | |
|                 "role": null,
 | |
|                 "tool_calls": null
 | |
|               },
 | |
|               "finish_reason": "stop",
 | |
|               "index": 0,
 | |
|               "logprobs": null
 | |
|             }
 | |
|           ],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": null,
 | |
|           "obfuscation": "OQSxN17glN"
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
 | |
|         "__data__": {
 | |
|           "id": "rec-639c49548ecb",
 | |
|           "choices": [],
 | |
|           "created": 0,
 | |
|           "model": "gpt-4o-2024-08-06",
 | |
|           "object": "chat.completion.chunk",
 | |
|           "service_tier": "default",
 | |
|           "system_fingerprint": "fp_cbf1785567",
 | |
|           "usage": {
 | |
|             "completion_tokens": 14,
 | |
|             "prompt_tokens": 986,
 | |
|             "total_tokens": 1000,
 | |
|             "completion_tokens_details": {
 | |
|               "accepted_prediction_tokens": 0,
 | |
|               "audio_tokens": 0,
 | |
|               "reasoning_tokens": 0,
 | |
|               "rejected_prediction_tokens": 0
 | |
|             },
 | |
|             "prompt_tokens_details": {
 | |
|               "audio_tokens": 0,
 | |
|               "cached_tokens": 0
 | |
|             }
 | |
|           },
 | |
|           "obfuscation": "Uf4PLyBNwljP"
 | |
|         }
 | |
|       }
 | |
|     ],
 | |
|     "is_streaming": true
 | |
|   },
 | |
|   "id_normalization_mapping": {}
 | |
| }
 |