feat(tests): implement test isolation for inference recordings

Uses test_id in request hashes and test-scoped subdirectories to prevent cross-test contamination. Model list endpoints exclude test_id to enable merging recordings from different servers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-14 12:12:45 +00:00 · 2025-10-03 20:05:24 -07:00 · 2025-10-03 20:05:24 -07:00 · 7008a70a6e
commit 7008a70a6e
parent f176196fba
303 changed files with 40627 additions and 105140 deletions
--- a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
+++ b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
--- a/tests/integration/agents/recordings/095b37e65a5a78904f225bdefc904d3e20145a4dab1be0cf07d17a416d85e58d.json
+++ b/tests/integration/agents/recordings/095b37e65a5a78904f225bdefc904d3e20145a4dab1be0cf07d17a416d85e58d.json
--- a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
+++ b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
--- a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
+++ b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
@ -0,0 +1,58 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_none[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST User message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-1a0d3109cf92",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 398,
+          "total_tokens": 400,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/1d82e9439ae3.json
+++ b/tests/integration/recordings/responses/1d82e9439ae3.json
--- a/tests/integration/agents/recordings/1fd05fad28c2771c56ac8462a742f6004a37724e9c44b32bfc7fef5ac44d744c.json
+++ b/tests/integration/agents/recordings/1fd05fad28c2771c56ac8462a742f6004a37724e9c44b32bfc7fef5ac44d744c.json
--- a/tests/integration/agents/recordings/28cc2057662b6d13ab1becc3f92e5afbdaa19c2192b588a5795a5fa4c09fb588.json
+++ b/tests/integration/agents/recordings/28cc2057662b6d13ab1becc3f92e5afbdaa19c2192b588a5795a5fa4c09fb588.json
--- a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
+++ b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
--- a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
+++ b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
--- a/tests/integration/recordings/responses/3a1a67912f65.json
+++ b/tests/integration/recordings/responses/3a1a67912f65.json
--- a/tests/integration/agents/recordings/3ef67b5ce7dbb36bf47c16b906e32b5d5f18f7e46b6425690d680b6536fffcf0.json
+++ b/tests/integration/agents/recordings/3ef67b5ce7dbb36bf47c16b906e32b5d5f18f7e46b6425690d680b6536fffcf0.json
--- a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
+++ b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
--- a/tests/integration/agents/recordings/45f7bb25cd8184f361ab72c0ca3f9391c966d802d60df60d5b5b7c8354d0da6d.json
+++ b/tests/integration/agents/recordings/45f7bb25cd8184f361ab72c0ca3f9391c966d802d60df60d5b5b7c8354d0da6d.json
--- a/tests/integration/agents/recordings/55c7250c01acd7f27b62fa966eae24af54de6d7e0e393918c58ac7215c60a29f.json
+++ b/tests/integration/agents/recordings/55c7250c01acd7f27b62fa966eae24af54de6d7e0e393918c58ac7215c60a29f.json
--- a/tests/integration/recordings/responses/5cc7605f9950.json
+++ b/tests/integration/recordings/responses/5cc7605f9950.json
--- a/tests/integration/agents/recordings/610695da59ffcce0a03a4d4b1ffbf61cf698f39fd41628173fb864e14b3a71c5.json
+++ b/tests/integration/agents/recordings/610695da59ffcce0a03a4d4b1ffbf61cf698f39fd41628173fb864e14b3a71c5.json
--- a/tests/integration/agents/recordings/6446f5838fb216dcc72e56d44ad62350d66e4f850bc15abb328f1278d1204723.json
+++ b/tests/integration/agents/recordings/6446f5838fb216dcc72e56d44ad62350d66e4f850bc15abb328f1278d1204723.json
--- a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
+++ b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
--- a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
+++ b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
--- a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
+++ b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
@ -0,0 +1,58 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST User message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-7094319e0384",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 398,
+          "total_tokens": 400,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
+++ b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
--- a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
+++ b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
--- a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
+++ b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
--- a/tests/integration/agents/recordings/868db6e8a427d63dc71b749257aa40343a2876e37cc0f695fee5f1ae6e1d6ba7.json
+++ b/tests/integration/agents/recordings/868db6e8a427d63dc71b749257aa40343a2876e37cc0f695fee5f1ae6e1d6ba7.json
--- a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
+++ b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
--- a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
+++ b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
--- a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
+++ b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
@ -0,0 +1,58 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-901956b3a51b",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 401,
+          "total_tokens": 403,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/agents/recordings/9354fb7f3064fe08a633f7e86444f54a53aaa490958df34f881f114c355d2dc2.json
+++ b/tests/integration/agents/recordings/9354fb7f3064fe08a633f7e86444f54a53aaa490958df34f881f114c355d2dc2.json
@ -0,0 +1,415 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_4p1xbexa",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_4p1xbexa",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": " Poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-9354fb7f3064",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
+++ b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
--- a/tests/integration/agents/recordings/961052707b2d39c56d3ac2ee7c71d67fffc2c1398c7d64d8e18a7b28135c42c3.json
+++ b/tests/integration/agents/recordings/961052707b2d39c56d3ac2ee7c71d67fffc2c1398c7d64d8e18a7b28135c42c3.json
--- a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
+++ b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
--- a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
+++ b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
@ -0,0 +1,58 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST User message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-a702e4bf918e",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 398,
+          "total_tokens": 400,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/accd741d4b74.json
+++ b/tests/integration/recordings/responses/accd741d4b74.json
--- a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
+++ b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
--- a/tests/integration/agents/recordings/ae230a6062d097d51a5a54cb41852f7f33f6b2c7a400039007c0822cac28d434.json
+++ b/tests/integration/agents/recordings/ae230a6062d097d51a5a54cb41852f7f33f6b2c7a400039007c0822cac28d434.json
--- a/tests/integration/agents/recordings/b62d824408e5d5f1e278fb6ed654b18a8c50eeed5cfdd125ec110752595649bc.json
+++ b/tests/integration/agents/recordings/b62d824408e5d5f1e278fb6ed654b18a8c50eeed5cfdd125ec110752595649bc.json
--- a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
+++ b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
--- a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
+++ b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
--- a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
+++ b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
--- a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
+++ b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
--- a/tests/integration/agents/recordings/da6ec32ed8fb5bf4d957058ddd391a9958c4cfd0ab301bbd7b466a889cfc631b.json
+++ b/tests/integration/agents/recordings/da6ec32ed8fb5bf4d957058ddd391a9958c4cfd0ab301bbd7b466a889cfc631b.json
--- a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
+++ b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
--- a/tests/integration/agents/recordings/df1ff58ea16f0cfb14c80acfee395b186a1feed8262ef40538f0a215954e4398.json
+++ b/tests/integration/agents/recordings/df1ff58ea16f0cfb14c80acfee395b186a1feed8262ef40538f0a215954e4398.json
--- a/tests/integration/agents/recordings/df8355d4d3267371f2a65159d80cfb4e34591e6222a5bc3a079c94a068bf8fd7.json
+++ b/tests/integration/agents/recordings/df8355d4d3267371f2a65159d80cfb4e34591e6222a5bc3a079c94a068bf8fd7.json
--- a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
+++ b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
--- a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
+++ b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
--- a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
+++ b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
@ -1,4 +1,5 @@
 {
+  "test_id": null,
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/ps",
--- a/tests/integration/common/recordings/1098240ef53bbd378adf8dafbd5838b16eef7d6a7d6e75d24e3c120e25e73750.json
+++ b/tests/integration/common/recordings/1098240ef53bbd378adf8dafbd5838b16eef7d6a7d6e75d24e3c120e25e73750.json
@ -0,0 +1,57 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace openai 1"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-1098240ef53b",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I'm happy to help you test the OpenAI API, but I need to clarify that I'm a large language model, not a direct interface to the OpenAI API. However, I can guide you through a simple testing process.\n\nOpenAI provides an API key that allows access to their models, including the powerful \"text-001\" model you mentioned. Here's how you can test it:\n\n**Step 1: Get your API Key**\n\nIf you haven't already, sign up for an OpenAI account at [openai.com](http://openai.com) and obtain a free API key.\n\n**Step 2: Access the API endpoint**\n\nOpenAI provides a few different endpoints for their models. For testing purposes, we'll use the \"API endpoint\" which is `https://api.openai.com/v1/models/text-001`. You can access this URL directly in your web browser or use a tool like `curl` to send an HTTP request.\n\n**Step 3: Test with a simple prompt**\n\nTo test the model, you can use a simple prompt, such as:\n\n`\"Write a short paragraph about AI.\"`\n\nYou can modify this prompt to suit your testing needs. You can also try using more complex prompts or even generating text based on user input.\n\nHere's an example of how you could send a request to the OpenAI endpoint using `curl`:\n```bash\ncurl -X POST \\\n  https://api.openai.com/v1/models/text-001 \\\n  -H 'Authorization: Bearer YOUR_API_KEY' \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"prompt\": \"Write a short paragraph about AI.\"}'\n```\nReplace `YOUR_API_KEY` with your actual OpenAI API key.\n\n**Step 4: Verify the response**\n\nAfter sending the request, you should receive a JSON response from OpenAI. The response will contain the generated text based on your prompt.\n\nIf everything goes well, you should see some text in response to your prompt. This might look something like this:\n```json\n{\n  \"result\": [\n    {\n      \"text\": \"Artificial intelligence has been able to learn and adapt quickly, allowing it to take over various tasks automatically.\"\n    }\n  ]\n}\n```\nThis is just a simple example to demonstrate the process of testing the OpenAI API. Keep in mind that this is just a brief illustration and the actual experience may vary depending on your specific use case.\n\nDo you want me to try using python - gpt-3 via Hugging Face to run some tests ? I can give it a shot if you're not up to the task yourself!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 539,
+          "prompt_tokens": 31,
+          "total_tokens": 570,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/395c30078677826058a0cbe136dfd07c816854cfb7015ee4ece0e414d16e7e52.json
+++ b/tests/integration/common/recordings/395c30078677826058a0cbe136dfd07c816854cfb7015ee4ece0e414d16e7e52.json
@ -0,0 +1,57 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace openai 2"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-395c30078677",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I'd be happy to help you test the OpenAI 2.0 model!\n\nPlease note that I need to clarify a few things:\n\n1. **I can only simulate the conversation**: I won't have direct access to the actual OpenAI 2.0 model.\n2. **The simulation might not be perfect**: The testing process will be based on my understanding of the model's capabilities and limitations.\n\nThat being said, here's how you can test the OpenAI 2.0 model with me:\n\n**Testing Scenarios**\n\nChoose one or more of the following scenarios to test the OpenAI 2.0 model:\n\n1. **Basic Conversation**: Test basic conversations, such as asking questions and receiving answers.\n2. **Dialogue Flow**: Test the model's ability to maintain a conversation flow by asking follow-up questions.\n3. **Creative Writing**: Test the model's creativity by asking it to generate paragraphs or short stories.\n4. **Fact Retrieval**: Test the model's knowledge retrieval capabilities by asking it factual questions.\n\n**Input Format**\n\nPlease format your input using plain text. You can use:\n\n* A single question or statement\n* Multiple statements separated by commas\n* A prompt with a specific format (e.g., \"Write a short story about [topic]\")\n\n**Example Input**\n```\nAsk me: What is the capital of France?\n```\n\nGo ahead and choose your testing scenario, and I'll simulate the conversation!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 293,
+          "prompt_tokens": 31,
+          "total_tokens": 324,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/41c28019c2c89e5962ae3043dc7015ee45aa5ee235645768a230a5fa5cd45ad9.json
+++ b/tests/integration/common/recordings/41c28019c2c89e5962ae3043dc7015ee45aa5ee235645768a230a5fa5cd45ad9.json
--- a/tests/integration/common/recordings/4df315784095a200b2d275c6f8dda8be845e250000208127d20cf8c4f0bc666c.json
+++ b/tests/integration/common/recordings/4df315784095a200b2d275c6f8dda8be845e250000208127d20cf8c4f0bc666c.json
--- a/tests/integration/common/recordings/5b03940f8f14616ba20bf3b695138b785ffc26aed814ef01db492f4a5674d6c5.json
+++ b/tests/integration/common/recordings/5b03940f8f14616ba20bf3b695138b785ffc26aed814ef01db492f4a5674d6c5.json
--- a/tests/integration/common/recordings/64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57.json
+++ b/tests/integration/common/recordings/64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57.json
@ -1,4 +1,5 @@
 {
+  "test_id": null,
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/models",
--- a/tests/integration/common/recordings/b2c646582d0a4d9d8986789261c0d630d5b604ee6291cf8aa3d44ab761f2c676.json
+++ b/tests/integration/common/recordings/b2c646582d0a4d9d8986789261c0d630d5b604ee6291cf8aa3d44ab761f2c676.json
--- a/tests/integration/common/recordings/bfc8818f4ad237ba6c9649d47eaff8946e334ea6a2bcb564d74f4f14dbc3497b.json
+++ b/tests/integration/common/recordings/bfc8818f4ad237ba6c9649d47eaff8946e334ea6a2bcb564d74f4f14dbc3497b.json
@ -0,0 +1,56 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace 1"
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-bfc8818f4ad2",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "It seems like you'd like to test something, but I'm not sure what. Could you please provide more context or clarify what you're trying to test? I'll do my best to assist you!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 42,
+          "prompt_tokens": 29,
+          "total_tokens": 71,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/c34cccb2af2fb9f02f7136b0dd350e75e7d2a77d222ef26a9bc419e10fa33c56.json
+++ b/tests/integration/common/recordings/c34cccb2af2fb9f02f7136b0dd350e75e7d2a77d222ef26a9bc419e10fa33c56.json
@ -0,0 +1,57 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace openai 0"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-c34cccb2af2f",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I'm not capable of testing or interacting with the OpenAI API directly. However, I can provide some general information about the OpenAI model called \"Trace\" and how it can be used.\n\nThe Trace was a transformer-based language model developed by OpenAI in 2022. It was designed to generate text based on a given prompt, but it never gained widespread use.\n\nUnfortunately, the model is no longer available for public testing because OpenAI removed it from their model hub after some issues were raised about its quality and limitations.\n\nThat being said, there are various other models that you might be interested in using as an alternative to the Trace. Here's a list of popular models:\n\n1. **Text-Transformer**: This model is designed for text classification and generation tasks.\n2, **DALL-E 2**: A text-to-image model capable of generating images based on user-provided input prompts.\n3:**Diffusers**: An AI model that can generate raw pixel data in the form of an image.\n\nTo test these models or others available through OpenAI models hub you may need to complete a sign-up process.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 225,
+          "prompt_tokens": 31,
+          "total_tokens": 256,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/c4f314b202711805808eb75f1947cb6cca0bf8dbffb0dfabb814f9da0083b3c3.json
+++ b/tests/integration/common/recordings/c4f314b202711805808eb75f1947cb6cca0bf8dbffb0dfabb814f9da0083b3c3.json
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-52e8575f.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-52e8575f.json
@ -1,4 +1,5 @@
 {
+  "test_id": null,
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/models",
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e660ee4a.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e660ee4a.json
@ -1,4 +1,5 @@
 {
+  "test_id": null,
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/models",
@ -9,47 +10,11 @@
  },
  "response": {
    "body": [
-      {
-        "__type__": "openai.types.model.Model",
-        "__data__": {
-          "id": "qwen3:8b",
-          "created": 1758707188,
-          "object": "model",
-          "owned_by": "library"
-        }
-      },
-      {
-        "__type__": "openai.types.model.Model",
-        "__data__": {
-          "id": "nomic-embed-text:137m-v1.5-fp16",
-          "created": 1758640855,
-          "object": "model",
-          "owned_by": "library"
-        }
-      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "nomic-embed-text:latest",
-          "created": 1756727155,
-          "object": "model",
-          "owned_by": "library"
-        }
-      },
-      {
-        "__type__": "openai.types.model.Model",
-        "__data__": {
-          "id": "llama3.2-vision:11b",
-          "created": 1756722893,
-          "object": "model",
-          "owned_by": "library"
-        }
-      },
-      {
-        "__type__": "openai.types.model.Model",
-        "__data__": {
-          "id": "llama-guard3:1b",
-          "created": 1756671473,
+          "created": 1756922046,
          "object": "model",
          "owned_by": "library"
        }
@ -58,7 +23,43 @@
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "all-minilm:l6-v2",
-          "created": 1756655274,
+          "created": 1756919946,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2-vision:11b",
+          "created": 1753926302,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2-vision:latest",
+          "created": 1753845527,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1753479584,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:1b",
+          "created": 1752814944,
          "object": "model",
          "owned_by": "library"
        }
@ -67,16 +68,7 @@
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "all-minilm:latest",
-          "created": 1747317111,
-          "object": "model",
-          "owned_by": "library"
-        }
-      },
-      {
-        "__type__": "openai.types.model.Model",
-        "__data__": {
-          "id": "llama3.2:3b-instruct-fp16",
-          "created": 1744974677,
+          "created": 1748994610,
          "object": "model",
          "owned_by": "library"
        }
@ -85,7 +77,16 @@
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "llama3.2:3b",
-          "created": 1743536220,
+          "created": 1746123323,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1746052428,
          "object": "model",
          "owned_by": "library"
        }
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -36,6 +36,24 @@ def pytest_sessionstart(session):
        os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"


+@pytest.fixture(autouse=True)
+def _track_test_context(request):
+    """Automatically track current test context for isolated recordings.
+
+    This fixture runs for every test and stores the test's nodeid in a contextvar
+    that the recording system can access to determine which subdirectory to use.
+    """
+    from llama_stack.testing.inference_recorder import _test_context
+
+    # Store the test nodeid (e.g., "tests/integration/responses/test_basic.py::test_foo[params]")
+    token = _test_context.set(request.node.nodeid)
+
+    yield
+
+    # Cleanup
+    _test_context.reset(token)
+
+
 def pytest_runtest_teardown(item):
    # Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
    outcome = getattr(item, "execution_outcome", None)
@ -137,8 +155,8 @@ def pytest_addoption(parser):

    parser.addoption(
        "--inference-mode",
-        help="Inference mode: { record, replay, live } (default: replay)",
-        choices=["record", "replay", "live"],
+        help="Inference mode: { record, replay, live, record-if-missing } (default: replay)",
+        choices=["record", "replay", "live", "record-if-missing"],
        default="replay",
    )
    parser.addoption(
--- a/tests/integration/eval/recordings/171c4dcb3dc848196f5d7fd87efd4626e70673c405ae1cd72b8dd0617104263e.json
+++ b/tests/integration/eval/recordings/171c4dcb3dc848196f5d7fd87efd4626e70673c405ae1cd72b8dd0617104263e.json
--- a/tests/integration/eval/recordings/1b2720589d2a4273b5eb2c06b50ab45674040195c15013c9ea43bc6331e1a831.json
+++ b/tests/integration/eval/recordings/1b2720589d2a4273b5eb2c06b50ab45674040195c15013c9ea43bc6331e1a831.json
@ -0,0 +1,57 @@
+{
+  "test_id": "tests/integration/eval/test_eval.py::test_evaluate_benchmark[txt=ollama/llama3.2:3b-instruct-fp16-basic::subset_of]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the currency of Japan?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-1b2720589d2a",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The official currency of Japan is the yen (JPY). It is denoted by the ISO code \"JPY\" and subdivided into 100 sen, although sen are no longer used in everyday transactions. The Japanese government currently plans to phase out cash payments for certain types of transactions and aims to move fully away from paper money by year-end 2031.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 74,
+          "prompt_tokens": 32,
+          "total_tokens": 106,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/eval/recordings/3e5ea35cb3dc92835d230456b6e2fc61593f964148d6c05df5c4a387a5389e6b.json
+++ b/tests/integration/eval/recordings/3e5ea35cb3dc92835d230456b6e2fc61593f964148d6c05df5c4a387a5389e6b.json
@ -0,0 +1,57 @@
+{
+  "test_id": "tests/integration/eval/test_eval.py::test_evaluate_benchmark[txt=ollama/llama3.2:3b-instruct-fp16-basic::subset_of]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the smallest country in the world?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-3e5ea35cb3dc",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The smallest country in the world is officially Vatican City, with an area of approximately 0.44 km\u00b2 (0.17 sq mi). It is located within Rome, Italy, and is home to the Pope and the central government of the Catholic Church.\n\nVatican City has a population of around 800 people, making it the smallest internationally recognized sovereign state in the world, both by area and population. Despite its small size, Vatican City has its own government, currency, postal system, and even its own police force.\n\nInterestingly, Vatican City is also the headquarters of the Catholic Church and is home to numerous iconic landmarks, including St. Peter's Basilica, the Sistine Chapel, and the Vatican Museums.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 146,
+          "prompt_tokens": 34,
+          "total_tokens": 180,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/eval/recordings/6de6d1ebc3128dfaba1efe654ca1453f12cd31ce2e294d20868c0c498b7d9136.json
+++ b/tests/integration/eval/recordings/6de6d1ebc3128dfaba1efe654ca1453f12cd31ce2e294d20868c0c498b7d9136.json
@ -0,0 +1,57 @@
+{
+  "test_id": "tests/integration/eval/test_eval.py::test_evaluate_benchmark[txt=ollama/llama3.2:3b-instruct-fp16-basic::subset_of]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        }
+      ],
+      "max_tokens": 0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-6de6d1ebc312",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "The capital of France is Paris.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 8,
+          "prompt_tokens": 32,
+          "total_tokens": 40,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/eval/recordings/9ebe1e04fc3a8d41f88992428a7c99669c7e19b3d551090eb6bec83b33de2a18.json
+++ b/tests/integration/eval/recordings/9ebe1e04fc3a8d41f88992428a7c99669c7e19b3d551090eb6bec83b33de2a18.json
--- a/tests/integration/eval/recordings/aa20023c358a0dc718355082cc244a231426700a772b8dc64abf05d8b126a736.json
+++ b/tests/integration/eval/recordings/aa20023c358a0dc718355082cc244a231426700a772b8dc64abf05d8b126a736.json
--- a/tests/integration/eval/recordings/b52a054b314c8b42634c4a9ef76280591f73cf26c00b7308dde7d19a1ced016c.json
+++ b/tests/integration/eval/recordings/b52a054b314c8b42634c4a9ef76280591f73cf26c00b7308dde7d19a1ced016c.json
--- a/tests/integration/eval/recordings/c07b01fe99467efcfa99f6ac9c60acc212cf2ac3bdd4192aabb5f98359236572.json
+++ b/tests/integration/eval/recordings/c07b01fe99467efcfa99f6ac9c60acc212cf2ac3bdd4192aabb5f98359236572.json
--- a/tests/integration/files/recordings/108e7edbe4a967fdb8befc0df3d3c10c336afca41aad421e653e4b02eab2a632.json
+++ b/tests/integration/files/recordings/108e7edbe4a967fdb8befc0df3d3c10c336afca41aad421e653e4b02eab2a632.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/files/test_files.py::test_files_authentication_shared_attributes",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T21:21:14.509335-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T21:20:56.787737-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/files/recordings/12a8beefd95278334e190cfbc8dba722684325cb294aaf45dd42f316d9f6ae5e.json
+++ b/tests/integration/files/recordings/12a8beefd95278334e190cfbc8dba722684325cb294aaf45dd42f316d9f6ae5e.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/files/test_files.py::test_files_authentication_isolation",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T21:21:14.509335-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T21:20:56.787737-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/files/recordings/1ed03eb1155d3be1507472aca14a1ae10513ccc6b93e7e9d9d50f4cf83b8276b.json
+++ b/tests/integration/files/recordings/1ed03eb1155d3be1507472aca14a1ae10513ccc6b93e7e9d9d50f4cf83b8276b.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/files/test_files.py::test_files_authentication_anonymous_access",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T21:21:14.509335-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T21:20:56.787737-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/0483665572c6b72adda44aa0b31c105c4a5ac4da26d514c61fbd319910e1ce05.json
+++ b/tests/integration/inference/recordings/0483665572c6b72adda44aa0b31c105c4a5ac4da26d514c61fbd319910e1ce05.json
@ -1,4 +1,5 @@
 {
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
--- a/tests/integration/inference/recordings/12ea4e4a2705b0589c186d9d068bac0afa6d2385867afa554c6c672722a4e1c0.json
+++ b/tests/integration/inference/recordings/12ea4e4a2705b0589c186d9d068bac0afa6d2385867afa554c6c672722a4e1c0.json
--- a/tests/integration/inference/recordings/166dbff8b991d40d060f9307fc2a67fc24e29dc542fdce9611756d28403e4e86.json
+++ b/tests/integration/inference/recordings/166dbff8b991d40d060f9307fc2a67fc24e29dc542fdce9611756d28403e4e86.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.143331-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:54.261283-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/1774e408dada8623a932f20fd67ab722cbff2a2213309d8a33ec2ed5a444e5d4.json
+++ b/tests/integration/inference/recordings/1774e408dada8623a932f20fd67ab722cbff2a2213309d8a33ec2ed5a444e5d4.json
@ -1,4 +1,5 @@
 {
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
--- a/tests/integration/inference/recordings/1f3149657717e1cca83f08af67b9f34993a49de31349b11ce13c5d0465ca5f6a.json
+++ b/tests/integration/inference/recordings/1f3149657717e1cca83f08af67b9f34993a49de31349b11ce13c5d0465ca5f6a.json
@ -1,4 +1,5 @@
 {
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
--- a/tests/integration/inference/recordings/20032ec4aeb47f2a2711ec6fcada258dd47fcaefd22f8ee6a6c0b07152c551d4.json
+++ b/tests/integration/inference/recordings/20032ec4aeb47f2a2711ec6fcada258dd47fcaefd22f8ee6a6c0b07152c551d4.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:07:01.911896-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/228c053a646f7563791aa7f633bb7353eb3f763add416a69ddb255b16f6ea0a9.json
+++ b/tests/integration/inference/recordings/228c053a646f7563791aa7f633bb7353eb3f763add416a69ddb255b16f6ea0a9.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:07:02.652559-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/2825486bdbce4722bbc4a993f9f39e820c16bc8d324dc598be1224dad079ebb7.json
+++ b/tests/integration/inference/recordings/2825486bdbce4722bbc4a993f9f39e820c16bc8d324dc598be1224dad079ebb7.json
--- a/tests/integration/inference/recordings/28b529023374d3345f46c0b183ee352ff5014ff344238bd40b820c61510097eb.json
+++ b/tests/integration/inference/recordings/28b529023374d3345f46c0b183ee352ff5014ff344238bd40b820c61510097eb.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:59.383555-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/28b74a89cdc85de62395836bb21baa777d3197245698f4a7531afa0a15c11d1c.json
+++ b/tests/integration/inference/recordings/28b74a89cdc85de62395836bb21baa777d3197245698f4a7531afa0a15c11d1c.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:07:04.627464-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/36968c3ac66be305df6bdac7b747612e4e6645cf3bfcec16f96a41397cbfd5aa.json
+++ b/tests/integration/inference/recordings/36968c3ac66be305df6bdac7b747612e4e6645cf3bfcec16f96a41397cbfd5aa.json
--- a/tests/integration/inference/recordings/37e10e6ac4dccb3a91333fcfe9a17806950b21da9470c98960495f3b9fe94ecd.json
+++ b/tests/integration/inference/recordings/37e10e6ac4dccb3a91333fcfe9a17806950b21da9470c98960495f3b9fe94ecd.json
--- a/tests/integration/inference/recordings/3d2d95c9e7f6a2e434c69fb2d5ff50a3cce15ef5f308d5443beebd71a7dc7818.json
+++ b/tests/integration/inference/recordings/3d2d95c9e7f6a2e434c69fb2d5ff50a3cce15ef5f308d5443beebd71a7dc7818.json
--- a/tests/integration/inference/recordings/4b67443597195a9092b2b27b1dff526263df8ab267f4a9df7196dcb6c11aef17.json
+++ b/tests/integration/inference/recordings/4b67443597195a9092b2b27b1dff526263df8ab267f4a9df7196dcb6c11aef17.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:07:03.361039-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/53f557a067b8f00b81dee1d0f8def8051f330d7a91faa68addc39f9b29e5dbbd.json
+++ b/tests/integration/inference/recordings/53f557a067b8f00b81dee1d0f8def8051f330d7a91faa68addc39f9b29e5dbbd.json
--- a/tests/integration/inference/recordings/54606aca0c734fe36c3fcba632a36aa4582c2c92af953638d6e1ae060c01d71e.json
+++ b/tests/integration/inference/recordings/54606aca0c734fe36c3fcba632a36aa4582c2c92af953638d6e1ae060c01d71e.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-False]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:07:08.369100-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/640984c9321e94cb3c6d24ab4af1e83e628af419d09d77948f2480651fa472f2.json
+++ b/tests/integration/inference/recordings/640984c9321e94cb3c6d24ab4af1e83e628af419d09d77948f2480651fa472f2.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.330036-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:54.261283-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/6626c46dfaf503c589013d6396a3e6871117c7c024d37f16a8d0b2a7def9f8ed.json
+++ b/tests/integration/inference/recordings/6626c46dfaf503c589013d6396a3e6871117c7c024d37f16a8d0b2a7def9f8ed.json
@ -0,0 +1,40 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=ollama/llama3.2:3b-instruct-fp16-inference:completion:suffix]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:05:44.851615-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/676c9e0736ad6864480d4a884e157e9a9ceb4e5f9441635b19809ed518542406.json
+++ b/tests/integration/inference/recordings/676c9e0736ad6864480d4a884e157e9a9ceb4e5f9441635b19809ed518542406.json
@ -0,0 +1,40 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=ollama/llama3.2:3b-instruct-fp16-inference:completion:sanity]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:05:44.851615-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/68a0a14e828ca575c2150f6de6961bf8d326b1ce1beb6902a7aa369b07f7e17a.json
+++ b/tests/integration/inference/recordings/68a0a14e828ca575c2150f6de6961bf8d326b1ce1beb6902a7aa369b07f7e17a.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:07:02.652559-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/6ea16771b09b4966fa1dfb0349a83f6e11a6716cf3bd9d6e711ed7497232c152.json
+++ b/tests/integration/inference/recordings/6ea16771b09b4966fa1dfb0349a83f6e11a6716cf3bd9d6e711ed7497232c152.json
@ -1,4 +1,5 @@
 {
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
--- a/tests/integration/inference/recordings/7794d17cdf3672c5508ce0b07b0622513134f71115f13dd5e65ddae9f2b54862.json
+++ b/tests/integration/inference/recordings/7794d17cdf3672c5508ce0b07b0622513134f71115f13dd5e65ddae9f2b54862.json
--- a/tests/integration/inference/recordings/78aa717a1d0a499a1cc065e95896b63af46cc13f5a6da2346502a16d55aab891.json
+++ b/tests/integration/inference/recordings/78aa717a1d0a499a1cc065e95896b63af46cc13f5a6da2346502a16d55aab891.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:54.261283-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/7b870ce8970377661fbc70050c35a7325434b30be994cd3ade1ad36aabc027e9.json
+++ b/tests/integration/inference/recordings/7b870ce8970377661fbc70050c35a7325434b30be994cd3ade1ad36aabc027e9.json
--- a/tests/integration/inference/recordings/803b63dc14be19794ed3eda644b58c1ddfee8dbd1c3f7e18b21ada4564d46f35.json
+++ b/tests/integration/inference/recordings/803b63dc14be19794ed3eda644b58c1ddfee8dbd1c3f7e18b21ada4564d46f35.json
@ -0,0 +1,40 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:05:47.301578-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/822c192e060366dd978158aebff1c2dcdb9ce6c923a245db07314151e9d397c2.json
+++ b/tests/integration/inference/recordings/822c192e060366dd978158aebff1c2dcdb9ce6c923a245db07314151e9d397c2.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.330036-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:54.261283-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/834be37d56251727f6bdcafeafbc3846670d41955cc46a693767cbc67bed9ae2.json
+++ b/tests/integration/inference/recordings/834be37d56251727f6bdcafeafbc3846670d41955cc46a693767cbc67bed9ae2.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.077957-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:54.261283-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/8823085c58bdb7dca853d7a661e51d80446d906528ee8f715d57f748fd533afc.json
+++ b/tests/integration/inference/recordings/8823085c58bdb7dca853d7a661e51d80446d906528ee8f715d57f748fd533afc.json
--- a/tests/integration/inference/recordings/9186cbbe565a752223cd841b490891989401b4f24e4a8dc6bb7a5a40a126a45e.json
+++ b/tests/integration/inference/recordings/9186cbbe565a752223cd841b490891989401b4f24e4a8dc6bb7a5a40a126a45e.json
@ -0,0 +1,78 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-03T20:06:55.533878-07:00",
+            "size": 585846784,
+            "size_vram": 585846784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            },
+            "context_length": 256
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "name": "llama3.2:3b-instruct-fp16",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "expires_at": "2025-10-03T20:06:54.261283-07:00",
+            "size": 7919570944,
+            "size_vram": 7919570944,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            },
+            "context_length": 4096
+          },
+          {
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-10-03T20:06:23.356289-07:00",
+            "size": 2350966784,
+            "size_vram": 2350966784,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            },
+            "context_length": 4096
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/Show more
+++ b/Show more