Merge remote-tracking branch 'origin/main' into stack-config-default-embed

2025-12-13 19:32:37 +00:00 · 2025-10-20 13:29:19 -07:00 · 2025-10-20 13:29:19 -07:00 · 31249a1a75
commit 31249a1a75
parent 7ffd20d112 2c43285e22
237 changed files with 30895 additions and 15441 deletions
--- a/tests/external/run-byoa.yaml
+++ b/tests/external/run-byoa.yaml
@ -7,6 +7,24 @@ providers:
  - provider_id: kaze
    provider_type: remote::kaze
    config: {}
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 external_apis_dir: ~/.llama/apis.d
 external_providers_dir: ~/.llama/providers.d
 server:
--- a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
+++ b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
@ -548,5 +548,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json
+++ b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json
@ -0,0 +1,447 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_5qverjg6",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_5qverjg6",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": {
+        "type": "function",
+        "function": {
+          "name": "get_boiling_point"
+        }
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " liquid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json
+++ b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json
@ -0,0 +1,888 @@
+{
+  "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant and speak in pirate language."
+        },
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        },
+        {
+          "role": "assistant",
+          "content": "The capital of France is Paris."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      }
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " Yer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " look",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "in",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " fer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " port",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " o",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " call",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " eh",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " That",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " be",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " one",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " Yer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " won",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "'t",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " go",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " astr",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "ay",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " answer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " mate",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "y",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": {
+            "completion_tokens": 32,
+            "prompt_tokens": 50,
+            "total_tokens": 82,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
+++ b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json
+++ b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json
@ -0,0 +1,256 @@
+{
+  "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      }
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " France",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " Paris",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": {
+            "completion_tokens": 8,
+            "prompt_tokens": 32,
+            "total_tokens": 40,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
+++ b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json
+++ b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json
--- a/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json
+++ b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json
@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools1]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_klhbln13",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point_with_metadata",
+                "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_klhbln13",
+          "content": "-212"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point_with_metadata",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "212",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json
+++ b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json
@ -56,7 +56,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_os3xa9go",
+                    "id": "call_6nqo069h",
                    "function": {
                      "arguments": "{\"city\":\"Tokyo\"}",
                      "name": "get_weather"
@ -115,9 +115,9 @@
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": {
-            "completion_tokens": 15,
+            "completion_tokens": 18,
            "prompt_tokens": 179,
-            "total_tokens": 194,
+            "total_tokens": 197,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
--- a/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json
+++ b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json
@ -0,0 +1,416 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool_infinite_loop[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant Always respond with tool calls no matter what. "
+        },
+        {
+          "role": "user",
+          "content": "Get the boiling point of polyjuice with a tool call."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_9x4z21g1",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_9x4z21g1",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " Poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
+++ b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
+++ b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
@ -66,7 +66,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_ixvkq8fh",
+                    "id": "call_icfpgg5q",
                    "function": {
                      "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}",
                      "name": "get_boiling_point"
@ -116,5 +116,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
+++ b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json
+++ b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json
@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_icfpgg5q",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_icfpgg5q",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " liquid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json
+++ b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json
--- a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json
+++ b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json
@ -48,7 +48,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_lqrdy0rt",
+                    "id": "call_x427af31",
                    "function": {
                      "arguments": "{}",
                      "name": "get_current_time"
@ -107,9 +107,9 @@
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": {
-            "completion_tokens": 14,
+            "completion_tokens": 12,
            "prompt_tokens": 161,
-            "total_tokens": 175,
+            "total_tokens": 173,
            "completion_tokens_details": null,
            "prompt_tokens_details": null
          }
--- a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json
+++ b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json
@ -56,7 +56,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_4ibtjudr",
+                    "id": "call_wkjhgmpf",
                    "function": {
                      "arguments": "{\"city\":\"Tokyo\"}",
                      "name": "get_weather"
--- a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
+++ b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
@ -66,7 +66,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_pojpzwm8",
+                    "id": "call_klhbln13",
                    "function": {
                      "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}",
                      "name": "get_boiling_point_with_metadata"
@ -116,5 +116,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json
+++ b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json
--- a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
+++ b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
+++ b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
+++ b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
+++ b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
@ -66,7 +66,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_rwasjr3y",
+                    "id": "call_zqu5i0ti",
                    "function": {
                      "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}",
                      "name": "get_boiling_point"
@ -116,5 +116,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json
+++ b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-7e794c73bf79",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 421,
+          "total_tokens": 423,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
+++ b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
+++ b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json
+++ b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json
@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools0]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_zqu5i0ti",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_zqu5i0ti",
+          "content": "-212"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "212",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
+++ b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json
+++ b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-8fc418c02b8b",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 421,
+          "total_tokens": 423,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
+++ b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
+++ b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
@ -1510,5 +1510,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
+++ b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
+++ b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
+++ b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json
+++ b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json
@ -0,0 +1,260 @@
+{
+  "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant."
+        },
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      }
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " France",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " Paris",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": {
+            "completion_tokens": 8,
+            "prompt_tokens": 38,
+            "total_tokens": 46,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json
+++ b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json
@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_z1rt0qb1",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_z1rt0qb1",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "required",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " liquid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
+++ b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
@ -66,7 +66,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_qryqpevz",
+                    "id": "call_9x4z21g1",
                    "function": {
                      "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}",
                      "name": "get_boiling_point"
@ -116,5 +116,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
+++ b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
+++ b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
+++ b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json
+++ b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-da6fc54bb65d",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 421,
+          "total_tokens": 423,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
+++ b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
@ -71,7 +71,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_ur5tbdbt",
+                    "id": "call_5qverjg6",
                    "function": {
                      "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}",
                      "name": "get_boiling_point"
@ -121,5 +121,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
+++ b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
@ -54,5 +54,6 @@
      }
    },
    "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
+++ b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
@ -66,7 +66,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_rq1pcgq7",
+                    "id": "call_z1rt0qb1",
                    "function": {
                      "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}",
                      "name": "get_boiling_point"
@ -116,5 +116,6 @@
      }
    ],
    "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
    # Response should be either a function call or a message
    output_type = response.output[0].type
    assert output_type in ["function_call", "message"]
+
+
+def test_response_with_instructions(openai_client, client_with_models, text_model_id):
+    """Test instructions parameter in the responses object."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ]
+
+    # First create a response without instructions parameter
+    response_w_o_instructions = client.responses.create(
+        model=text_model_id,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have None in the instructions field
+    assert response_w_o_instructions.instructions is None
+
+    # Next create a response and pass instructions parameter
+    instructions = "You are a helpful assistant."
+    response_with_instructions = client.responses.create(
+        model=text_model_id,
+        instructions=instructions,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have a valid instructions field
+    assert response_with_instructions.instructions == instructions
+
+    # Finally test instructions parameter with a previous response id
+    instructions2 = "You are a helpful assistant and speak in pirate language."
+    response_with_instructions2 = client.responses.create(
+        model=text_model_id,
+        instructions=instructions2,
+        input=messages,
+        previous_response_id=response_with_instructions.id,
+        stream=False,
+    )
+
+    # Verify instructions from previous response was not carried over to the next response
+    assert response_with_instructions2.instructions == instructions2
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@ -70,10 +70,15 @@ class BatchHelper:
    ):
        """Wait for a batch to reach a terminal status.

+        Uses exponential backoff polling strategy for efficient waiting:
+        - Starts with short intervals (0.1s) for fast batches (e.g., replay mode)
+        - Doubles interval each iteration up to a maximum
+        - Adapts automatically to both fast and slow batch processing
+
        Args:
            batch_id: The batch ID to monitor
            max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
-            sleep_interval: Time to sleep between checks in seconds (default: 1/10th of max_wait_time, min 1s, max 15s)
+            sleep_interval: If provided, uses fixed interval instead of exponential backoff
            expected_statuses: Set of expected terminal statuses (default: {"completed"})
            timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)

@ -84,10 +89,6 @@ class BatchHelper:
            pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
            pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
        """
-        if sleep_interval is None:
-            # Default to 1/10th of max_wait_time, with min 1s and max 15s
-            sleep_interval = max(1, min(15, max_wait_time // 10))
-
        if expected_statuses is None:
            expected_statuses = {"completed"}

@ -95,6 +96,15 @@ class BatchHelper:
        unexpected_statuses = terminal_statuses - expected_statuses

        start_time = time.time()
+
+        # Use exponential backoff if no explicit sleep_interval provided
+        if sleep_interval is None:
+            current_interval = 0.1  # Start with 100ms
+            max_interval = 10.0  # Cap at 10 seconds
+        else:
+            current_interval = sleep_interval
+            max_interval = sleep_interval
+
        while time.time() - start_time < max_wait_time:
            current_batch = self.client.batches.retrieve(batch_id)

@ -107,7 +117,11 @@ class BatchHelper:
                else:
                    pytest.fail(error_msg)

-            time.sleep(sleep_interval)
+            time.sleep(current_interval)
+
+            # Exponential backoff: double the interval each time, up to max
+            if sleep_interval is None:
+                current_interval = min(current_interval * 2, max_interval)

        timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
        if timeout_action == "skip":
--- a/tests/integration/common/recordings/ab1a32474062bbad640ce43d02d6b61ed9f174c225597f0241cf120c47c7d2fa.json
+++ b/tests/integration/common/recordings/ab1a32474062bbad640ce43d02d6b61ed9f174c225597f0241cf120c47c7d2fa.json
@ -0,0 +1,506 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is 2 + 2?"
+        },
+        {
+          "role": "assistant",
+          "content": "The answer to the equation 2 + 2 is 4."
+        },
+        {
+          "role": "user",
+          "content": "Tell me a short joke"
+        }
+      ],
+      "max_tokens": 0,
+      "stream": true
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": "Why",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " did",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " scare",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": "crow",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " win",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " an",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " award",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": "?\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": "Because",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " he",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " was",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " outstanding",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " his",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": " field",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-ab1a32474062",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
@ -0,0 +1,88 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1760453641,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "qwen3:4b",
+          "created": 1757615302,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-oss:latest",
+          "created": 1756395223,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "nomic-embed-text:latest",
+          "created": 1756318548,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b",
+          "created": 1755191039,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753968177,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:1b",
+          "created": 1746124735,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:latest",
+          "created": 1746044170,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json
@ -0,0 +1,44 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1753937098,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753936935,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1753936925,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -246,7 +246,7 @@ def instantiate_llama_stack_client(session):

        run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
        with open(run_config_file.name, "w") as f:
-            yaml.dump(run_config.model_dump(), f)
+            yaml.dump(run_config.model_dump(mode="json"), f)
        config = run_config_file.name

    client = LlamaStackAsLibraryClient(
--- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
+++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
@ -12,9 +12,15 @@ import pytest

 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.datatypes import User
+from llama_stack.core.storage.datatypes import SqlStoreReference
 from llama_stack.providers.utils.sqlstore.api import ColumnType
 from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig, SqliteSqlStoreConfig, sqlstore_impl
+from llama_stack.providers.utils.sqlstore.sqlstore import (
+    PostgresSqlStoreConfig,
+    SqliteSqlStoreConfig,
+    register_sqlstore_backends,
+    sqlstore_impl,
+)


 def get_postgres_config():
@ -55,8 +61,9 @@ def authorized_store(backend_config):
    config_func = backend_config

    config = config_func()
-
-    base_sqlstore = sqlstore_impl(config)
+    backend_name = f"sql_{type(config).__name__.lower()}"
+    register_sqlstore_backends({backend_name: config})
+    base_sqlstore = sqlstore_impl(SqlStoreReference(backend=backend_name, table_name="authorized_store"))
    authorized_store = AuthorizedSqlStore(base_sqlstore, default_policy())

    yield authorized_store
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@ -0,0 +1,95 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Telemetry test configuration using OpenTelemetry SDK exporters.
+
+This conftest provides in-memory telemetry collection for library_client mode only.
+Tests using these fixtures should skip in server mode since the in-memory collector
+cannot access spans from a separate server process.
+"""
+
+from typing import Any
+
+import opentelemetry.metrics as otel_metrics
+import opentelemetry.trace as otel_trace
+import pytest
+from opentelemetry import metrics, trace
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import InMemoryMetricReader
+from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
+from llama_stack.testing.api_recorder import patch_httpx_for_test_id
+from tests.integration.fixtures.common import instantiate_llama_stack_client
+
+
+class TestCollector:
+    def __init__(self, span_exp, metric_read):
+        assert span_exp and metric_read
+        self.span_exporter = span_exp
+        self.metric_reader = metric_read
+
+    def get_spans(self) -> tuple[ReadableSpan, ...]:
+        return self.span_exporter.get_finished_spans()
+
+    def get_metrics(self) -> Any | None:
+        metrics = self.metric_reader.get_metrics_data()
+        if metrics and metrics.resource_metrics:
+            return metrics.resource_metrics[0].scope_metrics[0].metrics
+        return None
+
+    def clear(self) -> None:
+        self.span_exporter.clear()
+        self.metric_reader.get_metrics_data()
+
+
+@pytest.fixture(scope="session")
+def _telemetry_providers():
+    """Set up in-memory OTEL providers before llama_stack_client initializes."""
+    # Reset set-once flags to allow re-initialization
+    if hasattr(otel_trace, "_TRACER_PROVIDER_SET_ONCE"):
+        otel_trace._TRACER_PROVIDER_SET_ONCE._done = False  # type: ignore
+    if hasattr(otel_metrics, "_METER_PROVIDER_SET_ONCE"):
+        otel_metrics._METER_PROVIDER_SET_ONCE._done = False  # type: ignore
+
+    # Create in-memory exporters/readers
+    span_exporter = InMemorySpanExporter()
+    tracer_provider = TracerProvider()
+    tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
+    trace.set_tracer_provider(tracer_provider)
+
+    metric_reader = InMemoryMetricReader()
+    meter_provider = MeterProvider(metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+
+    # Set module-level provider so TelemetryAdapter uses our in-memory providers
+    telemetry_module._TRACER_PROVIDER = tracer_provider
+
+    yield (span_exporter, metric_reader, tracer_provider, meter_provider)
+
+    telemetry_module._TRACER_PROVIDER = None
+    tracer_provider.shutdown()
+    meter_provider.shutdown()
+
+
+@pytest.fixture(scope="session")
+def llama_stack_client(_telemetry_providers, request):
+    """Override llama_stack_client to ensure in-memory telemetry providers are used."""
+    patch_httpx_for_test_id()
+    client = instantiate_llama_stack_client(request.session)
+
+    return client
+
+
+@pytest.fixture
+def mock_otlp_collector(_telemetry_providers):
+    """Provides access to telemetry data and clears between tests."""
+    span_exporter, metric_reader, _, _ = _telemetry_providers
+    collector = TestCollector(span_exporter, metric_reader)
+    yield collector
+    collector.clear()
--- a/tests/integration/telemetry/recordings/0de60cd6a6ec3dbfc4a7601e77be8083caf34f49147ad1c25efae1de3f0b25e5.json
+++ b/tests/integration/telemetry/recordings/0de60cd6a6ec3dbfc4a7601e77be8083caf34f49147ad1c25efae1de3f0b25e5.json
@ -0,0 +1,57 @@
+{
+  "test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test OpenAI telemetry creation"
+        }
+      ],
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-0de60cd6a6ec",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "I'm happy to help you with setting up and testing OpenAI's telemetry creation.\n\nOpenAI provides a feature called \"Telemetry\" which allows developers to collect data about their users' interactions with the model. To test this feature, we need to create a simple application that uses the OpenAI API and sends telemetry data to their servers.\n\nHere's an example code in Python that demonstrates how to create a simple telemetry creator:\n\n```python\nimport os\nfrom openai.api import API\n\n# Initialize the OpenAI API client\napi = API(os.environ['OPENAI_API_KEY'])\n\ndef create_user():\n    # Create a new user entity\n    user_entity = {\n        'id': 'user-123',\n        'name': 'John Doe',\n        'email': 'john.doe@example.com'\n    }\n    \n    # Send the user creation request to OpenAI\n    response = api.users.create(user_entity)\n    print(f\"User created: {response}\")\n\ndef create_transaction():\n    # Create a new transaction entity\n    transaction_entity = {\n        'id': 'tran-123',\n        'user_id': 'user-123',\n        'transaction_type': 'query'\n    }\n    \n    # Send the transaction creation request to OpenAI\n    response = api.transactions.create(transaction_entity)\n    print(f\"Transaction created: {response}\")\n\ndef send_telemetry_data():\n    # Create a new telemetry event entity\n    telemetry_event_entity = {\n        'id': 'telem-123',\n        'transaction_id': 'tran-123',\n        'data': '{ \"event\": \"test\", \"user_id\": 1 }'\n    }\n    \n    # Send the telemetry data to OpenAI\n    response = api.telemetry.create(telemetry_event_entity)\n    print(f\"Telemetry event sent: {response}\")\n\n# Test the telemetry creation\ncreate_user()\ncreate_transaction()\nsend_telemetry_data()\n```\n\nMake sure you replace `OPENAI_API_KEY` with your actual API key. Also, ensure that you have the OpenAI API client library installed by running `pip install openai`.\n\nOnce you've created the test code, run it and observe the behavior of the telemetry creation process.\n\nPlease let me know if you need further modifications or assistance!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 460,
+          "prompt_tokens": 30,
+          "total_tokens": 490,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/telemetry/recordings/1fcfd86d8111374dc852cfdea6bfdb6a511f92cee84a6325b04ae84878512c30.json
+++ b/tests/integration/telemetry/recordings/1fcfd86d8111374dc852cfdea6bfdb6a511f92cee84a6325b04ae84878512c30.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace openai with temperature 0.7"
+        }
+      ],
+      "max_tokens": 100,
+      "stream": false,
+      "temperature": 0.7
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-1fcfd86d8111",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n    input",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 100,
+          "prompt_tokens": 35,
+          "total_tokens": 135,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
+++ b/tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
--- a/tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
+++ b/tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
--- a/tests/integration/telemetry/recordings/dba5042d6691c2fbc29f2172c0f175e235f2829cb1c3e49781dd2b1850e28775.json
+++ b/tests/integration/telemetry/recordings/dba5042d6691c2fbc29f2172c0f175e235f2829cb1c3e49781dd2b1850e28775.json
@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Test trace openai with temperature 0.7"
+        }
+      ],
+      "max_tokens": 100,
+      "stream": false,
+      "temperature": 0.7
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-dba5042d6691",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "To test the \"trace\" functionality of OpenAI's GPT-4 model at a temperature of 0.7, you can follow these steps:\n\n1. First, make sure you have an account with OpenAI and have been granted access to their API.\n\n2. You will need to install the `transformers` library, which is the official library for working with Transformers models like GPT-4:\n\n   ```bash\npip install transformers\n```\n\n3. Next, import the necessary",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 100,
+          "prompt_tokens": 35,
+          "total_tokens": 135,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/telemetry/test_completions.py
+++ b/tests/integration/telemetry/test_completions.py
@ -0,0 +1,112 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Telemetry tests verifying @trace_protocol decorator format using in-memory exporter."""
+
+import json
+import os
+
+import pytest
+
+pytestmark = pytest.mark.skipif(
+    os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE") == "server",
+    reason="In-memory telemetry tests only work in library_client mode (server mode runs in separate process)",
+)
+
+
+def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
+    """Verify streaming adds chunk_count and __type__=async_generator."""
+
+    stream = llama_stack_client.chat.completions.create(
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Test trace openai 1"}],
+        stream=True,
+    )
+
+    chunks = list(stream)
+    assert len(chunks) > 0
+
+    spans = mock_otlp_collector.get_spans()
+    assert len(spans) > 0
+
+    chunk_count = None
+    for span in spans:
+        if span.attributes.get("__type__") == "async_generator":
+            chunk_count = span.attributes.get("chunk_count")
+            if chunk_count:
+                chunk_count = int(chunk_count)
+                break
+
+    assert chunk_count is not None
+    assert chunk_count == len(chunks)
+
+
+def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
+    """Comprehensive validation of telemetry data format including spans and metrics."""
+    response = llama_stack_client.chat.completions.create(
+        model=text_model_id,
+        messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
+        temperature=0.7,
+        max_tokens=100,
+        stream=False,
+    )
+
+    # Handle both dict and Pydantic model for usage
+    # This occurs due to the replay system returning a dict for usage, but the client returning a Pydantic model
+    # TODO: Fix this by making the replay system return a Pydantic model for usage
+    usage = response.usage if isinstance(response.usage, dict) else response.usage.model_dump()
+    assert usage.get("prompt_tokens") and usage["prompt_tokens"] > 0
+    assert usage.get("completion_tokens") and usage["completion_tokens"] > 0
+    assert usage.get("total_tokens") and usage["total_tokens"] > 0
+
+    # Verify spans
+    spans = mock_otlp_collector.get_spans()
+    assert len(spans) == 5
+
+    # we only need this captured one time
+    logged_model_id = None
+
+    for span in spans:
+        attrs = span.attributes
+        assert attrs is not None
+
+        # Root span is created manually by tracing middleware, not by @trace_protocol decorator
+        is_root_span = attrs.get("__root__") is True
+
+        if is_root_span:
+            # Root spans have different attributes
+            assert attrs.get("__location__") in ["library_client", "server"]
+        else:
+            # Non-root spans are created by @trace_protocol decorator
+            assert attrs.get("__autotraced__")
+            assert attrs.get("__class__") and attrs.get("__method__")
+            assert attrs.get("__type__") in ["async", "sync", "async_generator"]
+
+            args = json.loads(attrs["__args__"])
+            if "model_id" in args:
+                logged_model_id = args["model_id"]
+
+    assert logged_model_id is not None
+    assert logged_model_id == text_model_id
+
+    # TODO: re-enable this once metrics get fixed
+    """
+    # Verify token usage metrics in response
+    metrics = mock_otlp_collector.get_metrics()
+
+    assert metrics
+    for metric in metrics:
+        assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
+        assert metric.unit == "tokens"
+        assert metric.data.data_points and len(metric.data.data_points) == 1
+        match metric.name:
+            case "completion_tokens":
+                assert metric.data.data_points[0].value == usage["completion_tokens"]
+            case "total_tokens":
+                assert metric.data.data_points[0].value == usage["total_tokens"]
+            case "prompt_tokens":
+                assert metric.data.data_points[0].value == usage["prompt_tokens"
+    """
--- a/tests/integration/test_persistence_integration.py
+++ b/tests/integration/test_persistence_integration.py
@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import yaml
+
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.storage.datatypes import (
+    PostgresKVStoreConfig,
+    PostgresSqlStoreConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+)
+
+
+def test_starter_distribution_config_loads_and_resolves():
+    """Integration: Actual starter config should parse and have correct storage structure."""
+    with open("llama_stack/distributions/starter/run.yaml") as f:
+        config_dict = yaml.safe_load(f)
+
+    config = StackRunConfig(**config_dict)
+
+    # Config should have named backends and explicit store references
+    assert config.storage is not None
+    assert "kv_default" in config.storage.backends
+    assert "sql_default" in config.storage.backends
+    assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig)
+    assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig)
+
+    stores = config.storage.stores
+    assert stores.metadata is not None
+    assert stores.metadata.backend == "kv_default"
+    assert stores.metadata.namespace == "registry"
+
+    assert stores.inference is not None
+    assert stores.inference.backend == "sql_default"
+    assert stores.inference.table_name == "inference_store"
+    assert stores.inference.max_write_queue_size > 0
+    assert stores.inference.num_writers > 0
+
+    assert stores.conversations is not None
+    assert stores.conversations.backend == "sql_default"
+    assert stores.conversations.table_name == "openai_conversations"
+
+
+def test_postgres_demo_distribution_config_loads():
+    """Integration: Postgres demo should use Postgres backend for all stores."""
+    with open("llama_stack/distributions/postgres-demo/run.yaml") as f:
+        config_dict = yaml.safe_load(f)
+
+    config = StackRunConfig(**config_dict)
+
+    # Should have postgres backend
+    assert config.storage is not None
+    assert "kv_default" in config.storage.backends
+    assert "sql_default" in config.storage.backends
+    postgres_backend = config.storage.backends["sql_default"]
+    assert isinstance(postgres_backend, PostgresSqlStoreConfig)
+    assert postgres_backend.host == "${env.POSTGRES_HOST:=localhost}"
+
+    kv_backend = config.storage.backends["kv_default"]
+    assert isinstance(kv_backend, PostgresKVStoreConfig)
+
+    stores = config.storage.stores
+    # Stores target the Postgres backends explicitly
+    assert stores.metadata is not None
+    assert stores.metadata.backend == "kv_default"
+    assert stores.inference is not None
+    assert stores.inference.backend == "sql_default"
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@ -23,6 +23,27 @@ def config_with_image_name_int():
        image_name: 1234
        apis_to_serve: []
        built_at: {datetime.now().isoformat()}
+        storage:
+          backends:
+            kv_default:
+              type: kv_sqlite
+              db_path: /tmp/test_kv.db
+            sql_default:
+              type: sql_sqlite
+              db_path: /tmp/test_sql.db
+          stores:
+            metadata:
+              backend: kv_default
+              namespace: metadata
+            inference:
+              backend: sql_default
+              table_name: inference
+            conversations:
+              backend: sql_default
+              table_name: conversations
+            responses:
+              backend: sql_default
+              table_name: responses
        providers:
          inference:
            - provider_id: provider1
@ -54,6 +75,27 @@ def up_to_date_config():
        image_name: foo
        apis_to_serve: []
        built_at: {datetime.now().isoformat()}
+        storage:
+          backends:
+            kv_default:
+              type: kv_sqlite
+              db_path: /tmp/test_kv.db
+            sql_default:
+              type: sql_sqlite
+              db_path: /tmp/test_sql.db
+          stores:
+            metadata:
+              backend: kv_default
+              namespace: metadata
+            inference:
+              backend: sql_default
+              table_name: inference
+            conversations:
+              backend: sql_default
+              table_name: conversations
+            responses:
+              backend: sql_default
+              table_name: responses
        providers:
          inference:
            - provider_id: provider1
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import (
    ConversationServiceConfig,
    ConversationServiceImpl,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.storage.datatypes import (
+    ServerStoresConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends


@pytest.fixture
@ -28,7 +35,18 @@ async def service():
    with tempfile.TemporaryDirectory() as tmpdir:
        db_path = Path(tmpdir) / "test_conversations.db"

-        config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[])
+        storage = StorageConfig(
+            backends={
+                "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
+            },
+            stores=ServerStoresConfig(
+                conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+            ),
+        )
+        register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
+        run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
+
+        config = ConversationServiceConfig(run_config=run_config, policy=[])
        service = ConversationServiceImpl(config, {})
        await service.initialize()
        yield service
@ -121,9 +139,18 @@ async def test_policy_configuration():
            AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*"))
        ]

-        config = ConversationServiceConfig(
-            conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy
+        storage = StorageConfig(
+            backends={
+                "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
+            },
+            stores=ServerStoresConfig(
+                conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+            ),
        )
+        register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
+        run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
+
+        config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy)
        service = ConversationServiceImpl(config, {})
        await service.initialize()

--- a/tests/unit/core/test_storage_references.py
+++ b/tests/unit/core/test_storage_references.py
@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Unit tests for storage backend/reference validation."""
+
+import pytest
+from pydantic import ValidationError
+
+from llama_stack.core.datatypes import (
+    LLAMA_STACK_RUN_CONFIG_VERSION,
+    StackRunConfig,
+)
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+
+
+def _base_run_config(**overrides):
+    metadata_reference = overrides.pop(
+        "metadata_reference",
+        KVStoreReference(backend="kv_default", namespace="registry"),
+    )
+    inference_reference = overrides.pop(
+        "inference_reference",
+        InferenceStoreReference(backend="sql_default", table_name="inference"),
+    )
+    conversations_reference = overrides.pop(
+        "conversations_reference",
+        SqlStoreReference(backend="sql_default", table_name="conversations"),
+    )
+    storage = overrides.pop(
+        "storage",
+        StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"),
+                "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"),
+            },
+            stores=ServerStoresConfig(
+                metadata=metadata_reference,
+                inference=inference_reference,
+                conversations=conversations_reference,
+            ),
+        ),
+    )
+    return StackRunConfig(
+        version=LLAMA_STACK_RUN_CONFIG_VERSION,
+        image_name="test-distro",
+        apis=[],
+        providers={},
+        storage=storage,
+        **overrides,
+    )
+
+
+def test_references_require_known_backend():
+    with pytest.raises(ValidationError, match="unknown backend 'missing'"):
+        _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry"))
+
+
+def test_references_must_match_backend_family():
+    with pytest.raises(ValidationError, match="kv_.* is required"):
+        _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry"))
+
+    with pytest.raises(ValidationError, match="sql_.* is required"):
+        _base_run_config(
+            inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"),
+        )
+
+
+def test_valid_configuration_passes_validation():
+    config = _base_run_config()
+    stores = config.storage.stores
+    assert stores.metadata is not None and stores.metadata.backend == "kv_default"
+    assert stores.inference is not None and stores.inference.backend == "sql_default"
+    assert stores.conversations is not None and stores.conversations.backend == "sql_default"
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@ -13,6 +13,15 @@ from pydantic import BaseModel, Field, ValidationError

 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
 from llama_stack.providers.datatypes import ProviderSpec


@ -29,6 +38,32 @@ class SampleConfig(BaseModel):
        }


+def _default_storage() -> StorageConfig:
+    return StorageConfig(
+        backends={
+            "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
+            "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+            inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+            conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+        ),
+    )
+
+
+def make_stack_config(**overrides) -> StackRunConfig:
+    storage = overrides.pop("storage", _default_storage())
+    defaults = dict(
+        image_name="test_image",
+        apis=[],
+        providers={},
+        storage=storage,
+    )
+    defaults.update(overrides)
+    return StackRunConfig(**defaults)
+
+
@pytest.fixture
 def mock_providers():
    """Mock the available_providers function to return test providers."""
@ -47,8 +82,8 @@ def mock_providers():
@pytest.fixture
 def base_config(tmp_path):
    """Create a base StackRunConfig with common settings."""
-    return StackRunConfig(
-        image_name="test_image",
+    return make_stack_config(
+        apis=["inference"],
        providers={
            "inference": [
                Provider(
@ -222,8 +257,8 @@ class TestProviderRegistry:

    def test_missing_directory(self, mock_providers):
        """Test handling of missing external providers directory."""
-        config = StackRunConfig(
-            image_name="test_image",
+        config = make_stack_config(
+            apis=["inference"],
            providers={
                "inference": [
                    Provider(
@ -278,7 +313,6 @@ pip_packages:
        """Test loading an external provider from a module (success path)."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.providers.datatypes import Api, ProviderSpec

        # Simulate a provider module with get_provider_spec
@ -293,7 +327,7 @@ pip_packages:
        import_module_side_effect = make_import_module_side_effect(external_module=fake_module)

        with patch("importlib.import_module", side_effect=import_module_side_effect) as mock_import:
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -317,12 +351,11 @@ pip_packages:

    def test_external_provider_from_module_not_found(self, mock_providers):
        """Test handling ModuleNotFoundError for missing provider module."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig

        import_module_side_effect = make_import_module_side_effect(raise_for_external=True)

        with patch("importlib.import_module", side_effect=import_module_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -341,12 +374,11 @@ pip_packages:

    def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers):
        """Test handling missing get_provider_spec in provider module (should raise ValueError)."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig

        import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True)

        with patch("importlib.import_module", side_effect=import_module_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -399,13 +431,12 @@ class TestGetExternalProvidersFromModule:

    def test_stackrunconfig_provider_without_module(self, mock_providers):
        """Test that providers without module attribute are skipped."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module

        import_module_side_effect = make_import_module_side_effect()

        with patch("importlib.import_module", side_effect=import_module_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -426,7 +457,6 @@ class TestGetExternalProvidersFromModule:
        """Test provider with module containing version spec (e.g., package==1.0.0)."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module
        from llama_stack.providers.datatypes import ProviderSpec

@ -444,7 +474,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -564,7 +594,6 @@ class TestGetExternalProvidersFromModule:
        """Test when get_provider_spec returns a list of specs."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module
        from llama_stack.providers.datatypes import ProviderSpec

@ -589,7 +618,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -613,7 +642,6 @@ class TestGetExternalProvidersFromModule:
        """Test that list return filters specs by provider_type."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module
        from llama_stack.providers.datatypes import ProviderSpec

@ -638,7 +666,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -662,7 +690,6 @@ class TestGetExternalProvidersFromModule:
        """Test that list return adds multiple different provider_types when config requests them."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module
        from llama_stack.providers.datatypes import ProviderSpec

@ -688,7 +715,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -718,7 +745,6 @@ class TestGetExternalProvidersFromModule:

    def test_module_not_found_raises_value_error(self, mock_providers):
        """Test that ModuleNotFoundError raises ValueError with helpful message."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module

        def import_side_effect(name):
@ -727,7 +753,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -751,7 +777,6 @@ class TestGetExternalProvidersFromModule:
        """Test that generic exceptions are properly raised."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module

        def bad_spec():
@ -765,7 +790,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
@ -787,10 +812,9 @@ class TestGetExternalProvidersFromModule:

    def test_empty_provider_list(self, mock_providers):
        """Test with empty provider list."""
-        from llama_stack.core.datatypes import StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module

-        config = StackRunConfig(
+        config = make_stack_config(
            image_name="test_image",
            providers={},
        )
@ -805,7 +829,6 @@ class TestGetExternalProvidersFromModule:
        """Test multiple APIs with providers."""
        from types import SimpleNamespace

-        from llama_stack.core.datatypes import Provider, StackRunConfig
        from llama_stack.core.distribution import get_external_providers_from_module
        from llama_stack.providers.datatypes import ProviderSpec

@ -830,7 +853,7 @@ class TestGetExternalProvidersFromModule:
            raise ModuleNotFoundError(name)

        with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                image_name="test_image",
                providers={
                    "inference": [
--- a/tests/unit/distribution/test_stack_list_deps.py
+++ b/tests/unit/distribution/test_stack_list_deps.py
@ -0,0 +1,50 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import argparse
+from io import StringIO
+from unittest.mock import patch
+
+from llama_stack.cli.stack._list_deps import (
+    run_stack_list_deps_command,
+)
+
+
+def test_stack_list_deps_basic():
+    args = argparse.Namespace(
+        config=None,
+        env_name="test-env",
+        providers="inference=remote::ollama",
+        format="deps-only",
+    )
+
+    with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
+        run_stack_list_deps_command(args)
+        output = mock_stdout.getvalue()
+
+        # deps-only format should NOT include "uv pip install" or "Dependencies for"
+        assert "uv pip install" not in output
+        assert "Dependencies for" not in output
+
+        # Check that expected dependencies are present
+        assert "ollama" in output
+        assert "aiohttp" in output
+        assert "fastapi" in output
+
+
+def test_stack_list_deps_with_distro_uv():
+    args = argparse.Namespace(
+        config="starter",
+        env_name=None,
+        providers=None,
+        format="uv",
+    )
+
+    with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
+        run_stack_list_deps_command(args)
+        output = mock_stdout.getvalue()
+
+        assert "uv pip install" in output
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@ -11,11 +11,12 @@ from llama_stack.apis.common.errors import ResourceNotFoundError
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.access_control.access_control import default_policy
+from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
 from llama_stack.providers.inline.files.localfs import (
    LocalfsFilesImpl,
    LocalfsFilesImplConfig,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends


 class MockUploadFile:
@ -36,8 +37,11 @@ async def files_provider(tmp_path):
    storage_dir = tmp_path / "files"
    db_path = tmp_path / "files_metadata.db"

+    backend_name = "sql_localfs_test"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())})
    config = LocalfsFilesImplConfig(
-        storage_dir=storage_dir.as_posix(), metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix())
+        storage_dir=storage_dir.as_posix(),
+        metadata_store=SqlStoreReference(backend=backend_name, table_name="files_metadata"),
    )

    provider = LocalfsFilesImpl(config, default_policy())
--- a/tests/unit/prompts/prompts/conftest.py
+++ b/tests/unit/prompts/prompts/conftest.py
@ -9,7 +9,16 @@ import random
 import pytest

 from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends


@pytest.fixture
@ -19,12 +28,28 @@ async def temp_prompt_store(tmp_path_factory):
    db_path = str(temp_dir / f"{unique_id}.db")

    from llama_stack.core.datatypes import StackRunConfig
-    from llama_stack.providers.utils.kvstore import kvstore_impl

-    mock_run_config = StackRunConfig(image_name="test-distribution", apis=[], providers={})
+    storage = StorageConfig(
+        backends={
+            "kv_test": SqliteKVStoreConfig(db_path=db_path),
+            "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")),
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(backend="kv_test", namespace="registry"),
+            inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
+            conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
+        ),
+    )
+    mock_run_config = StackRunConfig(
+        image_name="test-distribution",
+        apis=[],
+        providers={},
+        storage=storage,
+    )
    config = PromptServiceConfig(run_config=mock_run_config)
    store = PromptServiceImpl(config, deps={})

-    store.kvstore = await kvstore_impl(SqliteKVStoreConfig(db_path=db_path))
+    register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
+    store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts"))

    yield store
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@ -26,6 +26,20 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo


+@pytest.fixture(autouse=True)
+def setup_backends(tmp_path):
+    """Register KV and SQL store backends for testing."""
+    from llama_stack.core.storage.datatypes import SqliteKVStoreConfig, SqliteSqlStoreConfig
+    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
+    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+    kv_path = str(tmp_path / "test_kv.db")
+    sql_path = str(tmp_path / "test_sql.db")
+
+    register_kvstore_backends({"kv_default": SqliteKVStoreConfig(db_path=kv_path)})
+    register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=sql_path)})
+
+
@pytest.fixture
 def mock_apis():
    return {
@ -40,15 +54,20 @@ def mock_apis():

@pytest.fixture
 def config(tmp_path):
+    from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+    from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig
+
    return MetaReferenceAgentsImplConfig(
-        persistence_store={
-            "type": "sqlite",
-            "db_path": str(tmp_path / "test.db"),
-        },
-        responses_store={
-            "type": "sqlite",
-            "db_path": str(tmp_path / "test.db"),
-        },
+        persistence=AgentPersistenceConfig(
+            agent_state=KVStoreReference(
+                backend="kv_default",
+                namespace="agents",
+            ),
+            responses=ResponsesStoreReference(
+                backend="sql_default",
+                table_name="responses",
+            ),
+        )
    )


--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -42,7 +42,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.tools.tools import ListToolDefsResponse, ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime
 from llama_stack.core.access_control.access_control import default_policy
-from llama_stack.core.datatypes import ResponsesStoreConfig
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
    OpenAIResponsesImpl,
 )
@ -50,7 +50,7 @@ from llama_stack.providers.utils.responses.responses_store import (
    ResponsesStore,
    _OpenAIResponseObjectWithInputAndMessages,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture


@ -814,6 +814,69 @@ async def test_create_openai_response_with_instructions_and_previous_response(
    assert sent_messages[3].content == "Which is the largest?"


+async def test_create_openai_response_with_previous_response_instructions(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test prepending instructions and previous response with instructions."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content="Name some towns in Ireland",
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content="Galway, Longford, Sligo",
+        status="completed",
+        role="assistant",
+    )
+    response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[
+            OpenAIUserMessageParam(content="Name some towns in Ireland"),
+            OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"),
+        ],
+        instructions="You are a helpful assistant.",
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    params = call_args.args[0]
+    sent_messages = params.messages
+
+    # Check that instructions were prepended as a system message
+    # and that the previous response instructions were not carried over
+    assert len(sent_messages) == 4, sent_messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
 async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
    """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
    # Setup
@ -854,8 +917,10 @@ async def test_responses_store_list_input_items_logic():

    # Create mock store and response store
    mock_sql_store = AsyncMock()
+    backend_name = "sql_responses_test"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path="mock_db_path")})
    responses_store = ResponsesStore(
-        ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy()
+        ResponsesStoreReference(backend=backend_name, table_name="responses"), policy=default_policy()
    )
    responses_store.sql_store = mock_sql_store

--- a/tests/unit/providers/batches/conftest.py
+++ b/tests/unit/providers/batches/conftest.py
@ -12,10 +12,10 @@ from unittest.mock import AsyncMock

 import pytest

+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.batches.reference.batches import ReferenceBatchesImpl
 from llama_stack.providers.inline.batches.reference.config import ReferenceBatchesImplConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends


@pytest.fixture
@ -23,8 +23,10 @@ async def provider():
    """Create a test provider instance with temporary database."""
    with tempfile.TemporaryDirectory() as tmpdir:
        db_path = Path(tmpdir) / "test_batches.db"
+        backend_name = "kv_batches_test"
        kvstore_config = SqliteKVStoreConfig(db_path=str(db_path))
-        config = ReferenceBatchesImplConfig(kvstore=kvstore_config)
+        register_kvstore_backends({backend_name: kvstore_config})
+        config = ReferenceBatchesImplConfig(kvstore=KVStoreReference(backend=backend_name, namespace="batches"))

        # Create kvstore and mock APIs
        kvstore = await kvstore_impl(config.kvstore)
--- a/tests/unit/providers/files/conftest.py
+++ b/tests/unit/providers/files/conftest.py
@ -8,8 +8,9 @@ import boto3
 import pytest
 from moto import mock_aws

+from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
 from llama_stack.providers.remote.files.s3 import S3FilesImplConfig, get_adapter_impl
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends


 class MockUploadFile:
@ -38,11 +39,13 @@ def sample_text_file2():
 def s3_config(tmp_path):
    db_path = tmp_path / "s3_files_metadata.db"

+    backend_name = f"sql_s3_{tmp_path.name}"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())})
    return S3FilesImplConfig(
        bucket_name=f"test-bucket-{tmp_path.name}",
        region="not-a-region",
        auto_create_bucket=True,
-        metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()),
+        metadata_store=SqlStoreReference(backend=backend_name, table_name="s3_files_metadata"),
    )


--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -12,13 +12,14 @@ import pytest

 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
 from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import register_kvstore_backends

 EMBEDDING_DIMENSION = 768
 COLLECTION_PREFIX = "test_collection"
@ -112,8 +113,9 @@ async def unique_kvstore_config(tmp_path_factory):
    unique_id = f"test_kv_{np.random.randint(1e6)}"
    temp_dir = tmp_path_factory.getbasetemp()
    db_path = str(temp_dir / f"{unique_id}.db")
-
-    return SqliteKVStoreConfig(db_path=db_path)
+    backend_name = f"kv_vector_{unique_id}"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)})
+    return KVStoreReference(backend=backend_name, namespace=f"vector_io::{unique_id}")


@pytest.fixture(scope="session")
@ -138,7 +140,7 @@ async def sqlite_vec_vec_index(embedding_dimension, tmp_path_factory):
 async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inference_api, embedding_dimension):
    config = SQLiteVectorIOConfig(
        db_path=sqlite_vec_db_path,
-        kvstore=unique_kvstore_config,
+        persistence=unique_kvstore_config,
    )
    adapter = SQLiteVecVectorIOAdapter(
        config=config,
@ -176,7 +178,7 @@ async def faiss_vec_index(embedding_dimension):
@pytest.fixture
 async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension):
    config = FaissVectorIOConfig(
-        kvstore=unique_kvstore_config,
+        persistence=unique_kvstore_config,
    )
    adapter = FaissVectorIOAdapter(
        config=config,
@ -251,7 +253,7 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd
        db="test_db",
        user="test_user",
        password="test_password",
-        kvstore=unique_kvstore_config,
+        persistence=unique_kvstore_config,
    )

    adapter = PGVectorVectorIOAdapter(config, mock_inference_api, None)
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@ -10,13 +10,13 @@ import pytest
 from llama_stack.apis.inference import Model
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.core.datatypes import VectorDBWithOwner
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.core.store.registry import (
    KEY_FORMAT,
    CachedDiskDistributionRegistry,
    DiskDistributionRegistry,
 )
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends


@pytest.fixture
@ -72,7 +72,11 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db,

    # Test cached version loads from disk
    db_path = sqlite_kvstore.db_path
-    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)))
+    backend_name = "kv_cached_test"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)})
+    cached_registry = CachedDiskDistributionRegistry(
+        await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
+    )
    await cached_registry.initialize()

    result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
@ -101,7 +105,11 @@ async def test_cached_registry_updates(cached_disk_dist_registry):

    # Verify persisted to disk
    db_path = cached_disk_dist_registry.kvstore.db_path
-    new_registry = DiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)))
+    backend_name = "kv_cached_new"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)})
+    new_registry = DiskDistributionRegistry(
+        await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
+    )
    await new_registry.initialize()
    result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
    assert result_vector_db is not None
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@ -516,6 +516,82 @@ def test_get_attributes_from_claims():
    assert set(attributes["teams"]) == {"my-team", "group1", "group2"}
    assert attributes["namespaces"] == ["my-tenant"]

+    # Test nested claims with dot notation (e.g., Keycloak resource_access structure)
+    claims = {
+        "sub": "user123",
+        "resource_access": {"llamastack": {"roles": ["inference_max", "admin"]}, "other-client": {"roles": ["viewer"]}},
+        "realm_access": {"roles": ["offline_access", "uma_authorization"]},
+    }
+    attributes = get_attributes_from_claims(
+        claims, {"resource_access.llamastack.roles": "roles", "realm_access.roles": "realm_roles"}
+    )
+    assert set(attributes["roles"]) == {"inference_max", "admin"}
+    assert set(attributes["realm_roles"]) == {"offline_access", "uma_authorization"}
+
+    # Test that dot notation takes precedence over literal keys with dots
+    claims = {
+        "my.dotted.key": "literal-value",
+        "my": {"dotted": {"key": "nested-value"}},
+    }
+    attributes = get_attributes_from_claims(claims, {"my.dotted.key": "test"})
+    assert attributes["test"] == ["nested-value"]
+
+    # Test that literal key works when nested traversal doesn't exist
+    claims = {
+        "my.dotted.key": "literal-value",
+    }
+    attributes = get_attributes_from_claims(claims, {"my.dotted.key": "test"})
+    assert attributes["test"] == ["literal-value"]
+
+    # Test missing nested paths are handled gracefully
+    claims = {
+        "sub": "user123",
+        "resource_access": {"other-client": {"roles": ["viewer"]}},
+    }
+    attributes = get_attributes_from_claims(
+        claims,
+        {
+            "resource_access.llamastack.roles": "roles",  # Missing nested path
+            "resource_access.missing.key": "missing_attr",  # Missing nested path
+            "completely.missing.path": "another_missing",  # Completely missing
+            "sub": "username",  # Existing path
+        },
+    )
+    # Only the existing claim should be in attributes
+    assert attributes["username"] == ["user123"]
+    assert "roles" not in attributes
+    assert "missing_attr" not in attributes
+    assert "another_missing" not in attributes
+
+    # Test mixture of flat and nested claims paths
+    claims = {
+        "sub": "user456",
+        "flat_key": "flat-value",
+        "scope": "read write admin",
+        "resource_access": {"app1": {"roles": ["role1", "role2"]}, "app2": {"roles": ["role3"]}},
+        "groups": ["group1", "group2"],
+        "metadata": {"tenant": "tenant1", "region": "us-west"},
+    }
+    attributes = get_attributes_from_claims(
+        claims,
+        {
+            "sub": "user_id",  # Flat string
+            "scope": "permissions",  # Flat string with spaces
+            "groups": "teams",  # Flat list
+            "resource_access.app1.roles": "app1_roles",  # Nested list
+            "resource_access.app2.roles": "app2_roles",  # Nested list
+            "metadata.tenant": "tenant",  # Nested string
+            "metadata.region": "region",  # Nested string
+        },
+    )
+    assert attributes["user_id"] == ["user456"]
+    assert set(attributes["permissions"]) == {"read", "write", "admin"}
+    assert set(attributes["teams"]) == {"group1", "group2"}
+    assert set(attributes["app1_roles"]) == {"role1", "role2"}
+    assert attributes["app2_roles"] == ["role3"]
+    assert attributes["tenant"] == ["tenant1"]
+    assert attributes["region"] == ["us-west"]
+

 # TODO: add more tests for oauth2 token provider

--- a/tests/unit/server/test_quota.py
+++ b/tests/unit/server/test_quota.py
@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from uuid import uuid4
+
 import pytest
 from fastapi import FastAPI, Request
 from fastapi.testclient import TestClient
@ -11,7 +13,8 @@ from starlette.middleware.base import BaseHTTPMiddleware

 from llama_stack.core.datatypes import QuotaConfig, QuotaPeriod
 from llama_stack.core.server.quota import QuotaMiddleware
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import register_kvstore_backends


 class InjectClientIDMiddleware(BaseHTTPMiddleware):
@ -29,8 +32,10 @@ class InjectClientIDMiddleware(BaseHTTPMiddleware):


 def build_quota_config(db_path) -> QuotaConfig:
+    backend_name = f"kv_quota_{uuid4().hex}"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=str(db_path))})
    return QuotaConfig(
-        kvstore=SqliteKVStoreConfig(db_path=str(db_path)),
+        kvstore=KVStoreReference(backend=backend_name, namespace="quota"),
        anonymous_max_requests=1,
        authenticated_max_requests=2,
        period=QuotaPeriod.DAY,
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@ -12,15 +12,22 @@ from unittest.mock import AsyncMock, MagicMock
 from pydantic import BaseModel, Field

 from llama_stack.apis.inference import Inference
-from llama_stack.core.datatypes import (
-    Api,
-    Provider,
-    StackRunConfig,
-)
+from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.resolver import resolve_impls
 from llama_stack.core.routers.inference import InferenceRouter
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
 from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
+from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends


 def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
@ -65,6 +72,35 @@ class SampleImpl:
        pass


+def make_run_config(**overrides) -> StackRunConfig:
+    storage = overrides.pop(
+        "storage",
+        StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
+                "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
+            },
+            stores=ServerStoresConfig(
+                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+                conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+            ),
+        ),
+    )
+    register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")})
+    register_sqlstore_backends(
+        {name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")}
+    )
+    defaults = dict(
+        image_name="test_image",
+        apis=[],
+        providers={},
+        storage=storage,
+    )
+    defaults.update(overrides)
+    return StackRunConfig(**defaults)
+
+
 async def test_resolve_impls_basic():
    # Create a real provider spec
    provider_spec = InlineProviderSpec(
@ -78,7 +114,7 @@ async def test_resolve_impls_basic():
    # Create provider registry with our provider
    provider_registry = {Api.inference: {provider_spec.provider_type: provider_spec}}

-    run_config = StackRunConfig(
+    run_config = make_run_config(
        image_name="test_image",
        providers={
            "inference": [
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@ -5,7 +5,6 @@
 # the root directory of this source tree.

 import time
-from tempfile import TemporaryDirectory

 import pytest

@ -16,8 +15,16 @@ from llama_stack.apis.inference import (
    OpenAIUserMessageParam,
    Order,
 )
+from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+
+@pytest.fixture(autouse=True)
+def setup_backends(tmp_path):
+    """Register SQL store backends for testing."""
+    db_path = str(tmp_path / "test.db")
+    register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=db_path)})


 def create_test_chat_completion(
@ -44,167 +51,162 @@ def create_test_chat_completion(

 async def test_inference_store_pagination_basic():
    """Test basic pagination functionality."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()

-        # Create test data with different timestamps
-        base_time = int(time.time())
-        test_data = [
-            ("zebra-task", base_time + 1),
-            ("apple-job", base_time + 2),
-            ("moon-work", base_time + 3),
-            ("banana-run", base_time + 4),
-            ("car-exec", base_time + 5),
-        ]
+    # Create test data with different timestamps
+    base_time = int(time.time())
+    test_data = [
+        ("zebra-task", base_time + 1),
+        ("apple-job", base_time + 2),
+        ("moon-work", base_time + 3),
+        ("banana-run", base_time + 4),
+        ("car-exec", base_time + 5),
+    ]

-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)

-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()

-        # Test 1: First page with limit=2, descending order (default)
-        result = await store.list_chat_completions(limit=2, order=Order.desc)
-        assert len(result.data) == 2
-        assert result.data[0].id == "car-exec"  # Most recent first
-        assert result.data[1].id == "banana-run"
-        assert result.has_more is True
-        assert result.last_id == "banana-run"
+    # Test 1: First page with limit=2, descending order (default)
+    result = await store.list_chat_completions(limit=2, order=Order.desc)
+    assert len(result.data) == 2
+    assert result.data[0].id == "car-exec"  # Most recent first
+    assert result.data[1].id == "banana-run"
+    assert result.has_more is True
+    assert result.last_id == "banana-run"

-        # Test 2: Second page using 'after' parameter
-        result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
-        assert len(result2.data) == 2
-        assert result2.data[0].id == "moon-work"
-        assert result2.data[1].id == "apple-job"
-        assert result2.has_more is True
+    # Test 2: Second page using 'after' parameter
+    result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
+    assert len(result2.data) == 2
+    assert result2.data[0].id == "moon-work"
+    assert result2.data[1].id == "apple-job"
+    assert result2.has_more is True

-        # Test 3: Final page
-        result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
-        assert len(result3.data) == 1
-        assert result3.data[0].id == "zebra-task"
-        assert result3.has_more is False
+    # Test 3: Final page
+    result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
+    assert len(result3.data) == 1
+    assert result3.data[0].id == "zebra-task"
+    assert result3.has_more is False


 async def test_inference_store_pagination_ascending():
    """Test pagination with ascending order."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()

-        # Create test data
-        base_time = int(time.time())
-        test_data = [
-            ("delta-item", base_time + 1),
-            ("charlie-task", base_time + 2),
-            ("alpha-work", base_time + 3),
-        ]
+    # Create test data
+    base_time = int(time.time())
+    test_data = [
+        ("delta-item", base_time + 1),
+        ("charlie-task", base_time + 2),
+        ("alpha-work", base_time + 3),
+    ]

-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)

-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()

-        # Test ascending order pagination
-        result = await store.list_chat_completions(limit=1, order=Order.asc)
-        assert len(result.data) == 1
-        assert result.data[0].id == "delta-item"  # Oldest first
-        assert result.has_more is True
+    # Test ascending order pagination
+    result = await store.list_chat_completions(limit=1, order=Order.asc)
+    assert len(result.data) == 1
+    assert result.data[0].id == "delta-item"  # Oldest first
+    assert result.has_more is True

-        # Second page with ascending order
-        result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
-        assert len(result2.data) == 1
-        assert result2.data[0].id == "charlie-task"
-        assert result2.has_more is True
+    # Second page with ascending order
+    result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
+    assert len(result2.data) == 1
+    assert result2.data[0].id == "charlie-task"
+    assert result2.has_more is True


 async def test_inference_store_pagination_with_model_filter():
    """Test pagination combined with model filtering."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()

-        # Create test data with different models
-        base_time = int(time.time())
-        test_data = [
-            ("xyz-task", base_time + 1, "model-a"),
-            ("def-work", base_time + 2, "model-b"),
-            ("pqr-job", base_time + 3, "model-a"),
-            ("abc-run", base_time + 4, "model-b"),
-        ]
+    # Create test data with different models
+    base_time = int(time.time())
+    test_data = [
+        ("xyz-task", base_time + 1, "model-a"),
+        ("def-work", base_time + 2, "model-b"),
+        ("pqr-job", base_time + 3, "model-a"),
+        ("abc-run", base_time + 4, "model-b"),
+    ]

-        # Store test chat completions
-        for completion_id, timestamp, model in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp, model)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp, model in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp, model)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)

-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()

-        # Test pagination with model filter
-        result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
-        assert len(result.data) == 1
-        assert result.data[0].id == "pqr-job"  # Most recent model-a
-        assert result.data[0].model == "model-a"
-        assert result.has_more is True
+    # Test pagination with model filter
+    result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
+    assert len(result.data) == 1
+    assert result.data[0].id == "pqr-job"  # Most recent model-a
+    assert result.data[0].model == "model-a"
+    assert result.has_more is True

-        # Second page with model filter
-        result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
-        assert len(result2.data) == 1
-        assert result2.data[0].id == "xyz-task"
-        assert result2.data[0].model == "model-a"
-        assert result2.has_more is False
+    # Second page with model filter
+    result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
+    assert len(result2.data) == 1
+    assert result2.data[0].id == "xyz-task"
+    assert result2.data[0].model == "model-a"
+    assert result2.has_more is False


 async def test_inference_store_pagination_invalid_after():
    """Test error handling for invalid 'after' parameter."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()

-        # Try to paginate with non-existent ID
-        with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
-            await store.list_chat_completions(after="non-existent", limit=2)
+    # Try to paginate with non-existent ID
+    with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
+        await store.list_chat_completions(after="non-existent", limit=2)


 async def test_inference_store_pagination_no_limit():
    """Test pagination behavior when no limit is specified."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()

-        # Create test data
-        base_time = int(time.time())
-        test_data = [
-            ("omega-first", base_time + 1),
-            ("beta-second", base_time + 2),
-        ]
+    # Create test data
+    base_time = int(time.time())
+    test_data = [
+        ("omega-first", base_time + 1),
+        ("beta-second", base_time + 2),
+    ]

-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)

-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()

-        # Test without limit
-        result = await store.list_chat_completions(order=Order.desc)
-        assert len(result.data) == 2
-        assert result.data[0].id == "beta-second"  # Most recent first
-        assert result.data[1].id == "omega-first"
-        assert result.has_more is False
+    # Test without limit
+    result = await store.list_chat_completions(order=Order.desc)
+    assert len(result.data) == 2
+    assert result.data[0].id == "beta-second"  # Most recent first
+    assert result.data[1].id == "omega-first"
+    assert result.has_more is False
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@ -6,6 +6,7 @@

 import time
 from tempfile import TemporaryDirectory
+from uuid import uuid4

 import pytest

@ -15,8 +16,18 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseObject,
 )
 from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+
+def build_store(db_path: str, policy: list | None = None) -> ResponsesStore:
+    backend_name = f"sql_responses_{uuid4().hex}"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path)})
+    return ResponsesStore(
+        ResponsesStoreReference(backend=backend_name, table_name="responses"),
+        policy=policy or [],
+    )


 def create_test_response_object(
@ -54,7 +65,7 @@ async def test_responses_store_pagination_basic():
    """Test basic pagination functionality for responses store."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Create test data with different timestamps
@ -103,7 +114,7 @@ async def test_responses_store_pagination_ascending():
    """Test pagination with ascending order."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Create test data
@ -141,7 +152,7 @@ async def test_responses_store_pagination_with_model_filter():
    """Test pagination combined with model filtering."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Create test data with different models
@ -182,7 +193,7 @@ async def test_responses_store_pagination_invalid_after():
    """Test error handling for invalid 'after' parameter."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Try to paginate with non-existent ID
@ -194,7 +205,7 @@ async def test_responses_store_pagination_no_limit():
    """Test pagination behavior when no limit is specified."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Create test data
@ -226,7 +237,7 @@ async def test_responses_store_get_response_object():
    """Test retrieving a single response object."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Store a test response
@ -254,7 +265,7 @@ async def test_responses_store_input_items_pagination():
    """Test pagination functionality for input items."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Store a test response with many inputs with explicit IDs
@ -335,7 +346,7 @@ async def test_responses_store_input_items_before_pagination():
    """Test before pagination functionality for input items."""
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
        await store.initialize()

        # Store a test response with many inputs with explicit IDs