mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-15 14:43:48 +00:00
Renames `inference_recorder.py` to `api_recorder.py` and extends it to support recording/replaying tool invocations in addition to inference calls. This allows us to record web-search, etc. tool calls and thereafter apply recordings for `tests/integration/responses` ## Test Plan ``` export OPENAI_API_KEY=... export TAVILY_SEARCH_API_KEY=... ./scripts/integration-tests.sh --stack-config ci-tests \ --suite responses --inference-mode record-if-missing ```
250 lines
10 KiB
JSON
250 lines
10 KiB
JSON
{
|
|
"test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=openai/text-embedding-3-small:dim=1536-llama_experts_pdf]",
|
|
"request": {
|
|
"method": "POST",
|
|
"url": "https://api.openai.com/v1/v1/chat/completions",
|
|
"headers": {},
|
|
"body": {
|
|
"model": "gpt-4o",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "How many experts does the Llama 4 Maverick model have?"
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": "",
|
|
"tool_calls": [
|
|
{
|
|
"index": 0,
|
|
"id": "call_zKJ7WgvCO3tx6yB0We1qI0d8",
|
|
"type": "function",
|
|
"function": {
|
|
"name": "knowledge_search",
|
|
"arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": "call_zKJ7WgvCO3tx6yB0We1qI0d8",
|
|
"content": [
|
|
{
|
|
"type": "text",
|
|
"text": "knowledge_search tool found 2 chunks:\nBEGIN of knowledge_search tool results.\n"
|
|
},
|
|
{
|
|
"type": "text",
|
|
"text": "[1] document_id: file-728796232029, score: 1.4945131220963286, attributes: {'filename': 'llama_stack_and_models.pdf', 'document_id': 'file-728796232029', 'token_count': 98.0, 'metadata_token_count': 11.0} (cite as <|file-728796232029|>)\n, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
|
|
},
|
|
{
|
|
"type": "text",
|
|
"text": "[2] document_id: file-728796232029, score: 1.1415676746925796, attributes: {'filename': 'llama_stack_and_models.pdf', 'document_id': 'file-728796232029', 'token_count': 498.0, 'metadata_token_count': 11.0} (cite as <|file-728796232029|>)\nLlama Stack \nLlama Stack Overview \nLlama Stack standardizes the core building blocks that simplify AI application development. It codifies best \npractices\n \nacross\n \nthe\n \nLlama\n \necosystem.\n \nMore\n \nspecifically,\n \nit\n \nprovides\n \u25cf Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry. \u25cf Plugin architecture to support the rich ecosystem of different API implementations in various \nenvironments,\n \nincluding\n \nlocal\n \ndevelopment,\n \non-premises,\n \ncloud,\n \nand\n \nmobile.\n \u25cf Prepackaged verified distributions which offer a one-stop solution for developers to get started quickly \nand\n \nreliably\n \nin\n \nany\n \nenvironment.\n \u25cf Multiple developer interfaces like CLI and SDKs for Python, Typescript, iOS, and Android. \u25cf Standalone applications as examples for how to build production-grade AI applications with Llama \nStack.\n \nLlama Stack Benefits \n\u25cf Flexible Options: Developers can choose their preferred infrastructure without changing APIs and enjoy \nflexible\n \ndeployment\n \nchoices.\n \u25cf Consistent Experience: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI \napplications\n \nwith\n \nconsistent\n \napplication\n \nbehavior.\n \u25cf Robust Ecosystem: Llama Stack is already integrated with distribution partners (cloud providers, \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n \nLlama 4 Maverick \n Llama 4 Maverick is a Mixture-of-Experts (MoE) model with 17 billion active parameters and 128 experts. \n"
|
|
},
|
|
{
|
|
"type": "text",
|
|
"text": "END of knowledge_search tool results.\n"
|
|
},
|
|
{
|
|
"type": "text",
|
|
"text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"stream": true,
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "knowledge_search",
|
|
"description": "Search for information in a database.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "The query to search for. Can be a natural language sentence or keywords."
|
|
}
|
|
},
|
|
"required": [
|
|
"query"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"endpoint": "/v1/chat/completions",
|
|
"model": "gpt-4o"
|
|
},
|
|
"response": {
|
|
"body": [
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "rec-c4915bdee5a9",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": "assistant",
|
|
"tool_calls": null
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null
|
|
}
|
|
],
|
|
"created": 0,
|
|
"model": "gpt-4o-2024-08-06",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": "default",
|
|
"system_fingerprint": "fp_f64f290af2",
|
|
"usage": null,
|
|
"obfuscation": "P4gkicgUKax"
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "rec-c4915bdee5a9",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "The Llama ",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": null,
|
|
"tool_calls": null
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null
|
|
}
|
|
],
|
|
"created": 0,
|
|
"model": "gpt-4o-2024-08-06",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": "default",
|
|
"system_fingerprint": "fp_f64f290af2",
|
|
"usage": null,
|
|
"obfuscation": "PIPk8kAyow"
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "rec-c4915bdee5a9",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "4 Maverick model is a Mixture-of-Experts (MoE)",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": null,
|
|
"tool_calls": null
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null
|
|
}
|
|
],
|
|
"created": 0,
|
|
"model": "gpt-4o-2024-08-06",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": "default",
|
|
"system_fingerprint": "fp_f64f290af2",
|
|
"usage": null,
|
|
"obfuscation": "viELmOBhSxTU"
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "rec-c4915bdee5a9",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": " model with ",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": null,
|
|
"tool_calls": null
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null
|
|
}
|
|
],
|
|
"created": 0,
|
|
"model": "gpt-4o-2024-08-06",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": "default",
|
|
"system_fingerprint": "fp_f64f290af2",
|
|
"usage": null,
|
|
"obfuscation": "M866QoC"
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "rec-c4915bdee5a9",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": "128 experts <|file-728796232029|>.",
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": null,
|
|
"tool_calls": null
|
|
},
|
|
"finish_reason": null,
|
|
"index": 0,
|
|
"logprobs": null
|
|
}
|
|
],
|
|
"created": 0,
|
|
"model": "gpt-4o-2024-08-06",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": "default",
|
|
"system_fingerprint": "fp_f64f290af2",
|
|
"usage": null,
|
|
"obfuscation": "s6h8QUwLvD"
|
|
}
|
|
},
|
|
{
|
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
|
"__data__": {
|
|
"id": "rec-c4915bdee5a9",
|
|
"choices": [
|
|
{
|
|
"delta": {
|
|
"content": null,
|
|
"function_call": null,
|
|
"refusal": null,
|
|
"role": null,
|
|
"tool_calls": null
|
|
},
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"logprobs": null
|
|
}
|
|
],
|
|
"created": 0,
|
|
"model": "gpt-4o-2024-08-06",
|
|
"object": "chat.completion.chunk",
|
|
"service_tier": "default",
|
|
"system_fingerprint": "fp_f64f290af2",
|
|
"usage": null,
|
|
"obfuscation": "ibcjGHS"
|
|
}
|
|
}
|
|
],
|
|
"is_streaming": true
|
|
},
|
|
"id_normalization_mapping": {}
|
|
}
|