From add64e8e2aa0c927f321ab027c22e79e59e958c2 Mon Sep 17 00:00:00 2001
From: Shabana Baig <43451943+s-akhtar-baig@users.noreply.github.com>
Date: Mon, 20 Oct 2025 16:10:37 -0400
Subject: [PATCH 1/3] feat: Add instructions parameter in response object
 (#3741)

# Problem
The current inline provider appends the user provided instructions to
messages as a system prompt, but the returned response object does not
contain the instructions field (as specified in the OpenAI responses
spec).

# What does this PR do?
This pull request adds the instruction field to the response object
definition and updates the inline provider. It also ensures that
instructions from previous response is not carried over to the next
response (as specified in the openAI spec).

Closes #[3566](https://github.com/llamastack/llama-stack/issues/3566)

## Test Plan

- Tested manually for change in model response w.r.t supplied
instructions field.
- Added unit test to check that the instructions from previous response
is not carried over to the next response.
- Added integration tests to check instructions parameter in the
returned response object.
- Added new recordings for the integration tests.

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 docs/static/deprecated-llama-stack-spec.html  |    8 +
 docs/static/deprecated-llama-stack-spec.yaml  |    8 +
 docs/static/llama-stack-spec.html             |    8 +
 docs/static/llama-stack-spec.yaml             |    8 +
 docs/static/stainless-llama-stack-spec.html   |    8 +
 docs/static/stainless-llama-stack-spec.yaml   |    8 +
 llama_stack/apis/agents/openai_responses.py   |    2 +
 .../responses/openai_responses.py             |    1 +
 .../meta_reference/responses/streaming.py     |    4 +
 ...e62998c6882727519858bbd5954307d10a673.json |    3 +-
 ...f54c271f879db8b5a6ce62848b86a43bc49e4.json |  447 ++
 ...aa3de23d22b30f353c8ed7e6cfd033d904e04.json |  888 +++
 ...6bb54955fe0b10f5c4102b78e2d428b5ffc7a.json |    3 +-
 ...2f8402f7c91d15e2240f855cc9b8b4e25352a.json |  256 +
 ...dee0b99fa1e0b27934de1e6c5d29c03026626.json |    3 +-
 ...8c34fbb3d0af4cf4307d4363ff570c260287b.json | 3494 ++++++---
 ...5b4df3aadc1637a93358a85c5ec2de8338332.json |  442 ++
 ...6753617b60a8c33ece637db18061d23086536.json |    6 +-
 ...f1b1d0e0bd618975cbf4752eb31ada6d2482b.json |  416 ++
 ...3f2fbc9d626af08314bd7f5ba69d038ea7c1b.json |    3 +-
 ...7c5db69585e66f4fde18eaa8bfd4bb4e3d783.json |    5 +-
 ...af8fb6bbab4f37691fadc08812ce223dfc628.json |    3 +-
 ...1c594644b2a1387ac3cee7cd434df25e8f22f.json |  442 ++
 ...99afa0ff16a609aaa941737e99606961a6a07.json | 1202 ++-
 ...36b34c42f68bf04b1b2cb74ddf00943c0442d.json |    6 +-
 ...1daa94c1287acf164cd81ddd51843d05be718.json |    2 +-
 ...2ae9dca423726834aec8b38420dccb735c050.json |    5 +-
 ...7d446d91e9837add7e9f4de236627195d41e4.json | 6624 +++++------------
 ...47259897598e28037fe5f7c09f6677edd08e9.json |    3 +-
 ...b487c7128fc28534351deb4662fba31043fa4.json |    3 +-
 ...41773965dd66b569506b5622b1a797c45f8e4.json |    3 +-
 ...e9973e2a938cab3db3e1be017bbe8be10edc6.json |    5 +-
 ...24849cb763c7bb66acf3937b524a539b80366.json |   59 +
 ...5ae2632ecf543ee440e7d87ea16f8e83461a5.json |    3 +-
 ...a5809d6fb56f8c9f92d93030f57cba51a1fe2.json |    3 +-
 ...81efbada34e06f59ddf536149871c64c9a247.json |  442 ++
 ...c137813e8db50d0d46395ef9ba98636fa5819.json |    3 +-
 ...c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json |   59 +
 ...af51636b480db9cc520614ee4886418776237.json |    3 +-
 ...5494b8fe0ff707261108305353e4ad980195f.json |    3 +-
 ...11d4aa54882a124d783a8096fd88adf481065.json |    3 +-
 ...120a4704dde82acf5ae198982fd62bd103279.json |    3 +-
 ...7851f102c12946164a563584e6316bd1b6228.json |    3 +-
 ...9a8eddb2c2aaf826b513fec55dcd70cdf35ea.json |  260 +
 ...aee0f274fc53b263c25fe5e9e4bc23739f3db.json |  442 ++
 ...f2cc9e9481ffae9cff5693b2f669270c9c0a7.json |    5 +-
 ...5e979c7a298fdbeedec153954ce817da7e3e7.json |    3 +-
 ...8170e073e49e478658a4b098b3581a703e843.json |    3 +-
 ...8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json |    3 +-
 ...28b644e76999ebb2fe8f09bead3dee56a6046.json |   59 +
 ...d4016aeeaf2bbeeaa5643d9620f5ea484430e.json |    5 +-
 ...256f81c43a906a0a56684ca97e848f8d6a94c.json |    3 +-
 ...3a3cee734d69beb7cd6d13a3d3c2c64eca734.json |    5 +-
 .../agents/test_openai_responses.py           |   50 +
 ...34a95f56931b792d5939f4cebc57-abd54ea0.json |   44 +
 .../meta_reference/test_openai_responses.py   |   63 +
 56 files changed, 10032 insertions(+), 5816 deletions(-)
 create mode 100644 tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json
 create mode 100644 tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json
 create mode 100644 tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json
 create mode 100644 tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json
 create mode 100644 tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json
 create mode 100644 tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json
 create mode 100644 tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json
 create mode 100644 tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json
 create mode 100644 tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json
 create mode 100644 tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json
 create mode 100644 tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json
 create mode 100644 tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json
 create mode 100644 tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json

diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 60a8b9fbd..98ed50c4f 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -9024,6 +9024,10 @@
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
                     },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                     "input": {
                         "type": "array",
                         "items": {
@@ -9901,6 +9905,10 @@
                     "usage": {
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index aaa6cd413..99c8dd03e 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -6734,6 +6734,10 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
         input:
           type: array
           items:
@@ -7403,6 +7407,10 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
       additionalProperties: false
       required:
         - created_at
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 413e4f23e..1091a1cb6 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -7600,6 +7600,10 @@
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
                     },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                     "input": {
                         "type": "array",
                         "items": {
@@ -8148,6 +8152,10 @@
                     "usage": {
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 93e51de6a..6c3702374 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -5815,6 +5815,10 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
         input:
           type: array
           items:
@@ -6218,6 +6222,10 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
       additionalProperties: false
       required:
         - created_at
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index 858f20725..ee0a265d3 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -9272,6 +9272,10 @@
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
                     },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
+                    },
                     "input": {
                         "type": "array",
                         "items": {
@@ -9820,6 +9824,10 @@
                     "usage": {
                         "$ref": "#/components/schemas/OpenAIResponseUsage",
                         "description": "(Optional) Token usage information for the response"
+                    },
+                    "instructions": {
+                        "type": "string",
+                        "description": "(Optional) System message inserted into the model's context"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 886549dbc..eff01931f 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -7028,6 +7028,10 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
         input:
           type: array
           items:
@@ -7431,6 +7435,10 @@ components:
           $ref: '#/components/schemas/OpenAIResponseUsage'
           description: >-
             (Optional) Token usage information for the response
+        instructions:
+          type: string
+          description: >-
+            (Optional) System message inserted into the model's context
       additionalProperties: false
       required:
         - created_at
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 25dc89a6b..821d6a8af 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
     :param tools: (Optional) An array of tools the model may call while generating a response.
     :param truncation: (Optional) Truncation strategy applied to the response
     :param usage: (Optional) Token usage information for the response
+    :param instructions: (Optional) System message inserted into the model's context
     """
 
     created_at: int
@@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
     tools: list[OpenAIResponseTool] | None = None
     truncation: str | None = None
     usage: OpenAIResponseUsage | None = None
+    instructions: str | None = None
 
 
 @json_schema_type
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 851e6ef28..2360dafd9 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -359,6 +359,7 @@ class OpenAIResponsesImpl:
             tool_executor=self.tool_executor,
             safety_api=self.safety_api,
             guardrail_ids=guardrail_ids,
+            instructions=instructions,
         )
 
         # Stream the response
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index caf899cdd..e80ffcdd1 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -110,6 +110,7 @@ class StreamingResponseOrchestrator:
         text: OpenAIResponseText,
         max_infer_iters: int,
         tool_executor,  # Will be the tool execution logic from the main class
+        instructions: str,
         safety_api,
         guardrail_ids: list[str] | None = None,
     ):
@@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
         self.accumulated_usage: OpenAIResponseUsage | None = None
         # Track if we've sent a refusal response
         self.violation_detected = False
+        # system message that is inserted into the model's context
+        self.instructions = instructions
 
     async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
         """Create a refusal response to replace streaming content."""
@@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
             tools=self.ctx.available_tools(),
             error=error,
             usage=self.accumulated_usage,
+            instructions=self.instructions,
         )
 
     async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
diff --git a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
index 4d4331740..067b7d254 100644
--- a/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
+++ b/tests/integration/agents/recordings/00f8a71ccb939737ed72a289eede62998c6882727519858bbd5954307d10a673.json
@@ -548,5 +548,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json
new file mode 100644
index 000000000..aa61b7dbe
--- /dev/null
+++ b/tests/integration/agents/recordings/0940d1521204120ff9687b8ad6bf54c271f879db8b5a6ce62848b86a43bc49e4.json
@@ -0,0 +1,447 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_5qverjg6",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_5qverjg6",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": {
+        "type": "function",
+        "function": {
+          "name": "get_boiling_point"
+        }
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " liquid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0940d1521204",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json
new file mode 100644
index 000000000..3cf297c34
--- /dev/null
+++ b/tests/integration/agents/recordings/0f5443c07d1568fd139b8f3ea0aaa3de23d22b30f353c8ed7e6cfd033d904e04.json
@@ -0,0 +1,888 @@
+{
+  "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant and speak in pirate language."
+        },
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        },
+        {
+          "role": "assistant",
+          "content": "The capital of France is Paris."
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      }
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " Yer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " look",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "in",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " fer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " port",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " o",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "'",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " call",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " eh",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " That",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " be",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " one",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "!",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " Yer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " won",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "'t",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " go",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " astr",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "ay",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " answer",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": " mate",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "y",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0f5443c07d15",
+          "choices": [],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": {
+            "completion_tokens": 32,
+            "prompt_tokens": 50,
+            "total_tokens": 82,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
index d606edb37..7efea91ba 100644
--- a/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
+++ b/tests/integration/agents/recordings/13fac3724cd626a119153f60fa56bb54955fe0b10f5c4102b78e2d428b5ffc7a.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json
new file mode 100644
index 000000000..b899e0c2d
--- /dev/null
+++ b/tests/integration/agents/recordings/15b23045b5cdfc49228d58e4a082f8402f7c91d15e2240f855cc9b8b4e25352a.json
@@ -0,0 +1,256 @@
+{
+  "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      }
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " France",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": " Paris",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15b23045b5cd",
+          "choices": [],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": {
+            "completion_tokens": 8,
+            "prompt_tokens": 32,
+            "total_tokens": 40,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
index b8b22f51d..407ac0655 100644
--- a/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
+++ b/tests/integration/agents/recordings/1a0d3109cf92111ed4cb061a857dee0b99fa1e0b27934de1e6c5d29c03026626.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json
index 4d7a1d1e4..241fb6127 100644
--- a/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json
+++ b/tests/integration/agents/recordings/1adb6f4621eaa9e5d350925c3fc8c34fbb3d0af4cf4307d4363ff570c260287b.json
@@ -55,7 +55,7 @@
           "choices": [
             {
               "delta": {
-                "content": "'m",
+                "content": "'d",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -81,7 +81,7 @@
           "choices": [
             {
               "delta": {
-                "content": " not",
+                "content": " be",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -107,7 +107,7 @@
           "choices": [
             {
               "delta": {
-                "content": " able",
+                "content": " happy",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -159,7 +159,7 @@
           "choices": [
             {
               "delta": {
-                "content": " provide",
+                "content": " help",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -185,7 +185,7 @@
           "choices": [
             {
               "delta": {
-                "content": " real",
+                "content": " you",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -211,7 +211,59 @@
           "choices": [
             {
               "delta": {
-                "content": "-time",
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -282,6 +334,58 @@
           "usage": null
         }
       },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Tokyo",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -393,189 +497,7 @@
           "choices": [
             {
               "delta": {
-                "content": " can",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " tell",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " you",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " that",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " Tokyo",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " Japan",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " has",
+                "content": "'m",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -627,7 +549,7 @@
           "choices": [
             {
               "delta": {
-                "content": " humid",
+                "content": " large",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -653,7 +575,7 @@
           "choices": [
             {
               "delta": {
-                "content": " subt",
+                "content": " language",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -679,7 +601,7 @@
           "choices": [
             {
               "delta": {
-                "content": "ropical",
+                "content": " model",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -705,7 +627,7 @@
           "choices": [
             {
               "delta": {
-                "content": " climate",
+                "content": ",",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -731,7 +653,7 @@
           "choices": [
             {
               "delta": {
-                "content": " with",
+                "content": " I",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -757,7 +679,7 @@
           "choices": [
             {
               "delta": {
-                "content": " hot",
+                "content": " don",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -783,7 +705,7 @@
           "choices": [
             {
               "delta": {
-                "content": " summers",
+                "content": "'t",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -809,7 +731,7 @@
           "choices": [
             {
               "delta": {
-                "content": " and",
+                "content": " have",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -835,7 +757,7 @@
           "choices": [
             {
               "delta": {
-                "content": " cold",
+                "content": " real",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -861,7 +783,7 @@
           "choices": [
             {
               "delta": {
-                "content": " winters",
+                "content": "-time",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -887,111 +809,7 @@
           "choices": [
             {
               "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "If",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " you",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "'d",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " like",
+                "content": " access",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1043,7 +861,449 @@
           "choices": [
             {
               "delta": {
-                "content": " know",
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " conditions",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "That",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " being",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " said",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " can",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " suggest",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " some",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " ways",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " find",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " out",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1147,59 +1407,7 @@
           "choices": [
             {
               "delta": {
-                "content": " or",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " forecast",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " for",
+                "content": " in",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1251,7 +1459,7 @@
           "choices": [
             {
               "delta": {
-                "content": ",",
+                "content": ":\n\n",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1277,7 +1485,7 @@
           "choices": [
             {
               "delta": {
-                "content": " I",
+                "content": "1",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1303,7 +1511,7 @@
           "choices": [
             {
               "delta": {
-                "content": " recommend",
+                "content": ".",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1329,59 +1537,7 @@
           "choices": [
             {
               "delta": {
-                "content": " checking",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " a",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " reliable",
+                "content": " Check",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1433,7 +1589,7 @@
           "choices": [
             {
               "delta": {
-                "content": " source",
+                "content": " weather",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1459,7 +1615,7 @@
           "choices": [
             {
               "delta": {
-                "content": " such",
+                "content": " websites",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1485,7 +1641,7 @@
           "choices": [
             {
               "delta": {
-                "content": " as",
+                "content": ":",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1511,7 +1667,7 @@
           "choices": [
             {
               "delta": {
-                "content": ":\n\n",
+                "content": " You",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1537,7 +1693,7 @@
           "choices": [
             {
               "delta": {
-                "content": "*",
+                "content": " can",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1563,7 +1719,267 @@
           "choices": [
             {
               "delta": {
-                "content": " The",
+                "content": " check",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " websites",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Acc",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "u",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "Weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".com",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1797,7 +2213,397 @@
           "choices": [
             {
               "delta": {
-                "content": " website",
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " condition",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " forecast",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Tokyo",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "2",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Use",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " mobile",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " app",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1849,7 +2655,7 @@
           "choices": [
             {
               "delta": {
-                "content": " <https",
+                "content": " There",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1875,7 +2681,7 @@
           "choices": [
             {
               "delta": {
-                "content": "://",
+                "content": " are",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1901,7 +2707,7 @@
           "choices": [
             {
               "delta": {
-                "content": "www",
+                "content": " many",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1927,7 +2733,7 @@
           "choices": [
             {
               "delta": {
-                "content": ".j",
+                "content": " mobile",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1953,7 +2759,7 @@
           "choices": [
             {
               "delta": {
-                "content": "ma",
+                "content": " apps",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1979,7 +2785,7 @@
           "choices": [
             {
               "delta": {
-                "content": ".go",
+                "content": " available",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2005,7 +2811,7 @@
           "choices": [
             {
               "delta": {
-                "content": ".jp",
+                "content": " that",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2031,7 +2837,7 @@
           "choices": [
             {
               "delta": {
-                "content": "/>\n",
+                "content": " provide",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2057,7 +2863,7 @@
           "choices": [
             {
               "delta": {
-                "content": "*",
+                "content": " real",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2083,7 +2889,7 @@
           "choices": [
             {
               "delta": {
-                "content": " Acc",
+                "content": "-time",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2109,7 +2915,7 @@
           "choices": [
             {
               "delta": {
-                "content": "u",
+                "content": " weather",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2135,7 +2941,7 @@
           "choices": [
             {
               "delta": {
-                "content": "Weather",
+                "content": " information",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2161,7 +2967,7 @@
           "choices": [
             {
               "delta": {
-                "content": ":",
+                "content": ",",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2187,7 +2993,7 @@
           "choices": [
             {
               "delta": {
-                "content": " https",
+                "content": " such",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2213,189 +3019,7 @@
           "choices": [
             {
               "delta": {
-                "content": "://",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "www",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": ".acc",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "u",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "weather",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": ".com",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "/\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "*",
+                "content": " as",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2466,6 +3090,240 @@
           "usage": null
         }
       },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Weather",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Underground",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "3",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Check",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " social",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " media",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -2499,7 +3357,7 @@
           "choices": [
             {
               "delta": {
-                "content": " https",
+                "content": " You",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2525,7 +3383,7 @@
           "choices": [
             {
               "delta": {
-                "content": "://",
+                "content": " can",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2551,7 +3409,7 @@
           "choices": [
             {
               "delta": {
-                "content": "dark",
+                "content": " also",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2577,7 +3435,7 @@
           "choices": [
             {
               "delta": {
-                "content": "sky",
+                "content": " check",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2603,7 +3461,7 @@
           "choices": [
             {
               "delta": {
-                "content": ".net",
+                "content": " social",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2629,7 +3487,7 @@
           "choices": [
             {
               "delta": {
-                "content": "/\n\n",
+                "content": " media",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2655,7 +3513,7 @@
           "choices": [
             {
               "delta": {
-                "content": "Please",
+                "content": " platforms",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2681,7 +3539,215 @@
           "choices": [
             {
               "delta": {
-                "content": " keep",
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Twitter",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " or",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " Facebook",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " updates",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " on",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2733,7 +3799,85 @@
           "choices": [
             {
               "delta": {
-                "content": " mind",
+                "content": " Tokyo",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "Please",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " note",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -2778,6 +3922,578 @@
           "usage": null
         }
       },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " my",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " knowledge",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " cutoff",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " December",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "202",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "3",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " so",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " may",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " not",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " have",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " most",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " up",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "-to",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": "-date",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " information",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " on",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1adb6f4621ea",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -2830,526 +4546,6 @@
           "usage": null
         }
       },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " can",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " change",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " quickly",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " and",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " it",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": "'s",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " always",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " a",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " good",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " idea",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " check",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " the",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " latest",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " forecast",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " before",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " planning",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " your",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-1adb6f4621ea",
-          "choices": [
-            {
-              "delta": {
-                "content": " activities",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -3413,9 +4609,9 @@
           "service_tier": null,
           "system_fingerprint": "fp_ollama",
           "usage": {
-            "completion_tokens": 131,
+            "completion_tokens": 176,
             "prompt_tokens": 32,
-            "total_tokens": 163,
+            "total_tokens": 208,
             "completion_tokens_details": null,
             "prompt_tokens_details": null
           }
diff --git a/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json
new file mode 100644
index 000000000..4c0fa6cce
--- /dev/null
+++ b/tests/integration/agents/recordings/1f0aef7475448c77021b4e321125b4df3aadc1637a93358a85c5ec2de8338332.json
@@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools1]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "Call get_boiling_point_with_metadata tool and answer What is the boiling point of polyjuice?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_klhbln13",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point_with_metadata",
+                "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_klhbln13",
+          "content": "-212"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point_with_metadata",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "212",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1f0aef747544",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json
index 992648658..9f9397057 100644
--- a/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json
+++ b/tests/integration/agents/recordings/2172059863d4d17e7525483102a6753617b60a8c33ece637db18061d23086536.json
@@ -56,7 +56,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_os3xa9go",
+                    "id": "call_6nqo069h",
                     "function": {
                       "arguments": "{\"city\":\"Tokyo\"}",
                       "name": "get_weather"
@@ -115,9 +115,9 @@
           "service_tier": null,
           "system_fingerprint": "fp_ollama",
           "usage": {
-            "completion_tokens": 15,
+            "completion_tokens": 18,
             "prompt_tokens": 179,
-            "total_tokens": 194,
+            "total_tokens": 197,
             "completion_tokens_details": null,
             "prompt_tokens_details": null
           }
diff --git a/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json
new file mode 100644
index 000000000..21d5a0663
--- /dev/null
+++ b/tests/integration/agents/recordings/256d8571909664fc6c925058b2ff1b1d0e0bd618975cbf4752eb31ada6d2482b.json
@@ -0,0 +1,416 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool_infinite_loop[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant Always respond with tool calls no matter what. "
+        },
+        {
+          "role": "user",
+          "content": "Get the boiling point of polyjuice with a tool call."
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_9x4z21g1",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_9x4z21g1",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " Poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-256d85719096",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
index a94c52c72..9a1781046 100644
--- a/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
+++ b/tests/integration/agents/recordings/292308724331c7172aaf91fe1373f2fbc9d626af08314bd7f5ba69d038ea7c1b.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
index 3699fbc8b..3a1f57ee8 100644
--- a/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
+++ b/tests/integration/agents/recordings/36e22908b34c0835037ba7b52477c5db69585e66f4fde18eaa8bfd4bb4e3d783.json
@@ -66,7 +66,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_ixvkq8fh",
+                    "id": "call_icfpgg5q",
                     "function": {
                       "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}",
                       "name": "get_boiling_point"
@@ -116,5 +116,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
index 4f001f5bf..0a27ddb7d 100644
--- a/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
+++ b/tests/integration/agents/recordings/45175e711385e24f62516c3982eaf8fb6bbab4f37691fadc08812ce223dfc628.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json
new file mode 100644
index 000000000..bfbbcb87b
--- /dev/null
+++ b/tests/integration/agents/recordings/45d0aabc502385b4cc23e16706a1c594644b2a1387ac3cee7cd434df25e8f22f.json
@@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_icfpgg5q",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_icfpgg5q",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " liquid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-45d0aabc5023",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json
index 89fa490c3..755276918 100644
--- a/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json
+++ b/tests/integration/agents/recordings/4da32cdf48ae4c3b381e3557edb99afa0ff16a609aaa941737e99606961a6a07.json
@@ -45,7 +45,33 @@
           "choices": [
             {
               "delta": {
-                "content": "The",
+                "content": "Italy",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -90,58 +116,6 @@
           "usage": null
         }
       },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-4da32cdf48ae",
-          "choices": [
-            {
-              "delta": {
-                "content": " of",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-4da32cdf48ae",
-          "choices": [
-            {
-              "delta": {
-                "content": " Italy",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -194,6 +168,1124 @@
           "usage": null
         }
       },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " also",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " seat",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " EU",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " as",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " well",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " it",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " has",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " been",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " centuries",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " significant",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " role",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " international",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " politics",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " being",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " also",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " an",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " important",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " location",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " various",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " historical",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " events",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " such",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " like",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " signing",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " treaty",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": " West",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": "ph",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-4da32cdf48ae",
+          "choices": [
+            {
+              "delta": {
+                "content": "alia",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -257,9 +1349,9 @@
           "service_tier": null,
           "system_fingerprint": "fp_ollama",
           "usage": {
-            "completion_tokens": 8,
+            "completion_tokens": 50,
             "prompt_tokens": 82,
-            "total_tokens": 90,
+            "total_tokens": 132,
             "completion_tokens_details": null,
             "prompt_tokens_details": null
           }
diff --git a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json
index cac9a6db2..988b270d7 100644
--- a/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json
+++ b/tests/integration/agents/recordings/585a2cf2c22b0db155a6a94052836b34c42f68bf04b1b2cb74ddf00943c0442d.json
@@ -48,7 +48,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_lqrdy0rt",
+                    "id": "call_x427af31",
                     "function": {
                       "arguments": "{}",
                       "name": "get_current_time"
@@ -107,9 +107,9 @@
           "service_tier": null,
           "system_fingerprint": "fp_ollama",
           "usage": {
-            "completion_tokens": 14,
+            "completion_tokens": 12,
             "prompt_tokens": 161,
-            "total_tokens": 175,
+            "total_tokens": 173,
             "completion_tokens_details": null,
             "prompt_tokens_details": null
           }
diff --git a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json
index 49ca098d5..009646e27 100644
--- a/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json
+++ b/tests/integration/agents/recordings/5edf2f0b7a9c875e80e4719f71a1daa94c1287acf164cd81ddd51843d05be718.json
@@ -56,7 +56,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_4ibtjudr",
+                    "id": "call_wkjhgmpf",
                     "function": {
                       "arguments": "{\"city\":\"Tokyo\"}",
                       "name": "get_weather"
diff --git a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
index 298e0e3b8..8b8f04ae6 100644
--- a/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
+++ b/tests/integration/agents/recordings/697a25dd7f0ff515f567c883ad72ae9dca423726834aec8b38420dccb735c050.json
@@ -66,7 +66,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_pojpzwm8",
+                    "id": "call_klhbln13",
                     "function": {
                       "arguments": "{\"celcius\":false,\"liquid_name\":\"polyjuice\"}",
                       "name": "get_boiling_point_with_metadata"
@@ -116,5 +116,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json
index fc263d5e9..d5d249587 100644
--- a/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json
+++ b/tests/integration/agents/recordings/6b207540bc67e2b9e6beb091d477d446d91e9837add7e9f4de236627195d41e4.json
@@ -81,33 +81,7 @@
           "choices": [
             {
               "delta": {
-                "content": " not",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " able",
+                "content": " happy",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -159,267 +133,7 @@
           "choices": [
             {
               "delta": {
-                "content": " provide",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " real",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "-time",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " weather",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " information",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ".",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " However",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " I",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " can",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " give",
+                "content": " help",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -471,7 +185,7 @@
           "choices": [
             {
               "delta": {
-                "content": " an",
+                "content": " with",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -497,7 +211,7 @@
           "choices": [
             {
               "delta": {
-                "content": " idea",
+                "content": " your",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -523,553 +237,7 @@
           "choices": [
             {
               "delta": {
-                "content": " of",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " what",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Tokyo",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "'s",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " typical",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " weather",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " is",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " like",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " during",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " different",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " seasons",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "Spring",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "March",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " May",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ")**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ":",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Mild",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
+                "content": " question",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1121,7 +289,7 @@
           "choices": [
             {
               "delta": {
-                "content": " usually",
+                "content": " but",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1147,7 +315,85 @@
           "choices": [
             {
               "delta": {
-                "content": " ranging",
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " need",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " more",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " information",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1199,7 +445,7 @@
           "choices": [
             {
               "delta": {
-                "content": " ",
+                "content": " you",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1225,7 +471,7 @@
           "choices": [
             {
               "delta": {
-                "content": "10",
+                "content": ".",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -1251,3725 +497,7 @@
           "choices": [
             {
               "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "20",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "50",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "68",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ").",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " It",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "'s",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " a",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " great",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " time",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " visit",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Tokyo",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " for",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " cherry",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " blossom",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " season",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "Summer",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "June",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " August",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ")**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ":",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Hot",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " and",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " humid",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " often",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " exceeding",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "30",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "86",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ").",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Summer",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " is",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " rainy",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " heavy",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " down",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "p",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "ours",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " during",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " the",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " after",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "no",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "ons",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "Aut",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "umn",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "September",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " November",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ")**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ":",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Comfort",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "able",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ranging",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " from",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "10",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "20",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "50",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "68",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ").",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Autumn",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " foliage",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " is",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " a",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " highlight",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " of",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Tokyo",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "'s",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " scenery",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "Winter",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "December",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " to",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " February",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ")**",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ":",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Cold",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " and",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " snowy",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " with",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " temperatures",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " sometimes",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " dropping",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " below",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " ",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "0",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0C",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " (",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "32",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "\u00b0F",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ").",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " Snow",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "fall",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " can",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " be",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " significant",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " in",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " some",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " parts",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " of",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " the",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " city",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": ".\n\n",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": "Please",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " note",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " that",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 0,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "rec-6b207540bc67",
-          "choices": [
-            {
-              "delta": {
-                "content": " these",
+                "content": " There",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5021,7 +549,7 @@
           "choices": [
             {
               "delta": {
-                "content": " general",
+                "content": " many",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5047,7 +575,7 @@
           "choices": [
             {
               "delta": {
-                "content": " temperature",
+                "content": " cities",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5073,7 +601,33 @@
           "choices": [
             {
               "delta": {
-                "content": " ranges",
+                "content": " named",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " Tokyo",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5151,7 +705,7 @@
           "choices": [
             {
               "delta": {
-                "content": " actual",
+                "content": " each",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5177,7 +731,7 @@
           "choices": [
             {
               "delta": {
-                "content": " weather",
+                "content": " one",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5203,7 +757,7 @@
           "choices": [
             {
               "delta": {
-                "content": " conditions",
+                "content": " has",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5229,7 +783,7 @@
           "choices": [
             {
               "delta": {
-                "content": " may",
+                "content": " a",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5255,7 +809,7 @@
           "choices": [
             {
               "delta": {
-                "content": " vary",
+                "content": " different",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5281,7 +835,7 @@
           "choices": [
             {
               "delta": {
-                "content": " from",
+                "content": " climate",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5307,7 +861,319 @@
           "choices": [
             {
               "delta": {
-                "content": " year",
+                "content": ".\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": "Could",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " please",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " tell",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " me",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " which",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " city",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " Japan",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " are",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " referring",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5359,7 +1225,917 @@
           "choices": [
             {
               "delta": {
-                "content": " year",
+                "content": "?",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " Tokyo",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " itself",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " not",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " always",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " good",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " choice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " as",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " it",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " often",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " gets",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " confused",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " actual",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " name",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " large",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " populous",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " area",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " K",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": "anto",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " region",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " which",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " includes",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " larger",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " areas",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " surrounding",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " Tokyo",
                 "function_call": null,
                 "refusal": null,
                 "role": "assistant",
@@ -5404,6 +2180,708 @@
           "usage": null
         }
       },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " \n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": "If",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " does",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " not",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " give",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " us",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " enough",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " grounds",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " then",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " could",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " provide",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " also",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " what",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " approximate",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " month",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " you",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " want",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " information",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " about",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " (",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": "for",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " seasonal",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": " changes",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-6b207540bc67",
+          "choices": [
+            {
+              "delta": {
+                "content": ")?",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
       {
         "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
         "__data__": {
@@ -5441,9 +2919,9 @@
           "service_tier": null,
           "system_fingerprint": "fp_ollama",
           "usage": {
-            "completion_tokens": 208,
+            "completion_tokens": 111,
             "prompt_tokens": 32,
-            "total_tokens": 240,
+            "total_tokens": 143,
             "completion_tokens_details": null,
             "prompt_tokens_details": null
           }
diff --git a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
index 41c4f97ae..a178476e1 100644
--- a/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
+++ b/tests/integration/agents/recordings/6da760645fe224ace4ab628e4f647259897598e28037fe5f7c09f6677edd08e9.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
index dce0c2e4d..7f7bf13ca 100644
--- a/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
+++ b/tests/integration/agents/recordings/7094319e038424fbec54338c397b487c7128fc28534351deb4662fba31043fa4.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
index d8b125dad..a1464e8c3 100644
--- a/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
+++ b/tests/integration/agents/recordings/74c26f63592ceedb76eb9623fce41773965dd66b569506b5622b1a797c45f8e4.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
index e11d38095..665e53245 100644
--- a/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
+++ b/tests/integration/agents/recordings/7e0d8c4abe407611ceaa99beea2e9973e2a938cab3db3e1be017bbe8be10edc6.json
@@ -66,7 +66,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_rwasjr3y",
+                    "id": "call_zqu5i0ti",
                     "function": {
                       "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}",
                       "name": "get_boiling_point"
@@ -116,5 +116,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json
new file mode 100644
index 000000000..dfae71291
--- /dev/null
+++ b/tests/integration/agents/recordings/7e794c73bf79604a10482bba03124849cb763c7bb66acf3937b524a539b80366.json
@@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-7e794c73bf79",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 421,
+          "total_tokens": 423,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
index c82ea6394..fa03baf5e 100644
--- a/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
+++ b/tests/integration/agents/recordings/868820c8d798c0d16063d1750a65ae2632ecf543ee440e7d87ea16f8e83461a5.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
index c33ecca7e..c702a53aa 100644
--- a/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
+++ b/tests/integration/agents/recordings/86e2b939aabb9dfe7ec712a6b20a5809d6fb56f8c9f92d93030f57cba51a1fe2.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json
new file mode 100644
index 000000000..9d391c7c8
--- /dev/null
+++ b/tests/integration/agents/recordings/8c4ec47152697a5b34e44d75af581efbada34e06f59ddf536149871c64c9a247.json
@@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_create_turn_response[ollama/llama3.2:3b-instruct-fp16-client_tools0]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "Call get_boiling_point tool and answer What is the boiling point of polyjuice?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_zqu5i0ti",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":null,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_zqu5i0ti",
+          "content": "-212"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "212",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " degrees",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": " Celsius",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-8c4ec4715269",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
index b209de507..15f9b0f96 100644
--- a/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
+++ b/tests/integration/agents/recordings/8ed0947593196c2b2f68035e248c137813e8db50d0d46395ef9ba98636fa5819.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json
new file mode 100644
index 000000000..70d92b2bf
--- /dev/null
+++ b/tests/integration/agents/recordings/8fc418c02b8b6fe09238e36fb72c5e7fc9d41fdaa3eb357f518e0fcaec5ea1e2.json
@@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_get_boiling_point[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-8fc418c02b8b",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 421,
+          "total_tokens": 423,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
index 07b7f8331..16078a8c2 100644
--- a/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
+++ b/tests/integration/agents/recordings/901956b3a51b792f2506d603489af51636b480db9cc520614ee4886418776237.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
index aeb1fe320..ec3117ee3 100644
--- a/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
+++ b/tests/integration/agents/recordings/958f9b74e98bcf41e4988db8ad15494b8fe0ff707261108305353e4ad980195f.json
@@ -1510,5 +1510,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
index 93155e18c..4d8a2a9ce 100644
--- a/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
+++ b/tests/integration/agents/recordings/96623a251d6e51ee6ba21c53ca111d4aa54882a124d783a8096fd88adf481065.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
index 1903e3d19..5200b2e65 100644
--- a/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
+++ b/tests/integration/agents/recordings/a702e4bf918e94acd0d76ed753c120a4704dde82acf5ae198982fd62bd103279.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
index d6ec4ea4b..52d599fe0 100644
--- a/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
+++ b/tests/integration/agents/recordings/ad3f6a2b4031bcd38026c3c50617851f102c12946164a563584e6316bd1b6228.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json
new file mode 100644
index 000000000..15a721ef9
--- /dev/null
+++ b/tests/integration/agents/recordings/b3c24a0ab429fb3d7e3680a2a689a8eddb2c2aaf826b513fec55dcd70cdf35ea.json
@@ -0,0 +1,260 @@
+{
+  "test_id": "tests/integration/agents/test_openai_responses.py::test_response_with_instructions[txt=ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant."
+        },
+        {
+          "role": "user",
+          "content": "What is the capital of France?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      }
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " capital",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " France",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": " Paris",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b3c24a0ab429",
+          "choices": [],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": {
+            "completion_tokens": 8,
+            "prompt_tokens": 38,
+            "total_tokens": 46,
+            "completion_tokens_details": null,
+            "prompt_tokens_details": null
+          }
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json
new file mode 100644
index 000000000..50e14c9fc
--- /dev/null
+++ b/tests/integration/agents/recordings/b4a47451a2af579b9dfb4a60bacaee0f274fc53b263c25fe5e9e4bc23739f3db.json
@@ -0,0 +1,442 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_tool_choice_required[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant"
+        },
+        {
+          "role": "user",
+          "content": "What is the boiling point of the liquid polyjuice in celsius?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "id": "call_z1rt0qb1",
+              "type": "function",
+              "function": {
+                "name": "get_boiling_point",
+                "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_z1rt0qb1",
+          "content": "-100"
+        }
+      ],
+      "max_tokens": 512,
+      "stream": true,
+      "temperature": 0.0001,
+      "tool_choice": "required",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_boiling_point",
+            "description": "Returns the boiling point of a liquid in Celcius or Fahrenheit.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "liquid_name": {
+                  "type": "string",
+                  "description": "The name of the liquid"
+                },
+                "celcius": {
+                  "type": "boolean",
+                  "description": "Whether to return the boiling point in Celcius"
+                }
+              },
+              "required": [
+                "liquid_name"
+              ]
+            }
+          }
+        }
+      ],
+      "top_p": 0.9
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " boiling",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " point",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " liquid",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " poly",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "ju",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "ice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": " -",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "100",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "\u00b0C",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b4a47451a2af",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
index c7ecef75f..4527ab5cd 100644
--- a/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
+++ b/tests/integration/agents/recordings/bcb50763cac6527944653e77df1f2cc9e9481ffae9cff5693b2f669270c9c0a7.json
@@ -66,7 +66,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_qryqpevz",
+                    "id": "call_9x4z21g1",
                     "function": {
                       "arguments": "{\"celcius\":\"true\",\"liquid_name\":\"polyjuice\"}",
                       "name": "get_boiling_point"
@@ -116,5 +116,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
index e3f54171f..a1332fddb 100644
--- a/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
+++ b/tests/integration/agents/recordings/c97c102959ea8c64a43e4c752bf5e979c7a298fdbeedec153954ce817da7e3e7.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
index 145596d38..64b28de5c 100644
--- a/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
+++ b/tests/integration/agents/recordings/ca95f47c2896a7ce1536a3cf1a78170e073e49e478658a4b098b3581a703e843.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
index a333490a4..ae1be7520 100644
--- a/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
+++ b/tests/integration/agents/recordings/d35fc2ef48595f5d0afe8fb617c8df864a155017e8d4c5d2e2b2c51e9cfaed5e.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json
new file mode 100644
index 000000000..f17ae2ae3
--- /dev/null
+++ b/tests/integration/agents/recordings/da6fc54bb65dd1f83e577109b8228b644e76999ebb2fe8f09bead3dee56a6046.json
@@ -0,0 +1,59 @@
+{
+  "test_id": "tests/integration/agents/test_agents.py::test_custom_tool[ollama/llama3.2:3b-instruct-fp16]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of liquid polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-da6fc54bb65d",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 0,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 421,
+          "total_tokens": 423,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
index da06f3968..55e71cf27 100644
--- a/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
+++ b/tests/integration/agents/recordings/db5c89b87eba0d129ad9ed17306d4016aeeaf2bbeeaa5643d9620f5ea484430e.json
@@ -71,7 +71,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_ur5tbdbt",
+                    "id": "call_5qverjg6",
                     "function": {
                       "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}",
                       "name": "get_boiling_point"
@@ -121,5 +121,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
index cb2afc5ed..06d8a4305 100644
--- a/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
+++ b/tests/integration/agents/recordings/ed76dd5fdf892c9cc959b2d301a256f81c43a906a0a56684ca97e848f8d6a94c.json
@@ -54,5 +54,6 @@
       }
     },
     "is_streaming": false
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
index 2e1e9f4e5..dbb70df6c 100644
--- a/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
+++ b/tests/integration/agents/recordings/f85c3c14185386eecd4939eeb6b3a3cee734d69beb7cd6d13a3d3c2c64eca734.json
@@ -66,7 +66,7 @@
                 "tool_calls": [
                   {
                     "index": 0,
-                    "id": "call_rq1pcgq7",
+                    "id": "call_z1rt0qb1",
                     "function": {
                       "arguments": "{\"celcius\":true,\"liquid_name\":\"polyjuice\"}",
                       "name": "get_boiling_point"
@@ -116,5 +116,6 @@
       }
     ],
     "is_streaming": true
-  }
+  },
+  "id_normalization_mapping": {}
 }
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index 675e2b904..d413d5201 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -466,3 +466,53 @@ def test_guardrails_with_tools(compat_client, text_model_id):
     # Response should be either a function call or a message
     output_type = response.output[0].type
     assert output_type in ["function_call", "message"]
+
+
+def test_response_with_instructions(openai_client, client_with_models, text_model_id):
+    """Test instructions parameter in the responses object."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        }
+    ]
+
+    # First create a response without instructions parameter
+    response_w_o_instructions = client.responses.create(
+        model=text_model_id,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have None in the instructions field
+    assert response_w_o_instructions.instructions is None
+
+    # Next create a response and pass instructions parameter
+    instructions = "You are a helpful assistant."
+    response_with_instructions = client.responses.create(
+        model=text_model_id,
+        instructions=instructions,
+        input=messages,
+        stream=False,
+    )
+
+    # Verify we have a valid instructions field
+    assert response_with_instructions.instructions == instructions
+
+    # Finally test instructions parameter with a previous response id
+    instructions2 = "You are a helpful assistant and speak in pirate language."
+    response_with_instructions2 = client.responses.create(
+        model=text_model_id,
+        instructions=instructions2,
+        input=messages,
+        previous_response_id=response_with_instructions.id,
+        stream=False,
+    )
+
+    # Verify instructions from previous response was not carried over to the next response
+    assert response_with_instructions2.instructions == instructions2
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json
new file mode 100644
index 000000000..77e244a01
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-abd54ea0.json
@@ -0,0 +1,44 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1753937098,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753936935,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1753936925,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index e93668a62..54c1820fb 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -814,6 +814,69 @@ async def test_create_openai_response_with_instructions_and_previous_response(
     assert sent_messages[3].content == "Which is the largest?"
 
 
+async def test_create_openai_response_with_previous_response_instructions(
+    openai_responses_impl, mock_responses_store, mock_inference_api
+):
+    """Test prepending instructions and previous response with instructions."""
+
+    input_item_message = OpenAIResponseMessage(
+        id="123",
+        content="Name some towns in Ireland",
+        role="user",
+    )
+    response_output_message = OpenAIResponseMessage(
+        id="123",
+        content="Galway, Longford, Sligo",
+        status="completed",
+        role="assistant",
+    )
+    response = _OpenAIResponseObjectWithInputAndMessages(
+        created_at=1,
+        id="resp_123",
+        model="fake_model",
+        output=[response_output_message],
+        status="completed",
+        text=OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")),
+        input=[input_item_message],
+        messages=[
+            OpenAIUserMessageParam(content="Name some towns in Ireland"),
+            OpenAIAssistantMessageParam(content="Galway, Longford, Sligo"),
+        ],
+        instructions="You are a helpful assistant.",
+    )
+    mock_responses_store.get_response_object.return_value = response
+
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+    instructions = "You are a geography expert. Provide concise answers."
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream()
+
+    # Execute
+    await openai_responses_impl.create_openai_response(
+        input="Which is the largest?", model=model, instructions=instructions, previous_response_id="123"
+    )
+
+    # Verify
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    call_args = mock_inference_api.openai_chat_completion.call_args
+    params = call_args.args[0]
+    sent_messages = params.messages
+
+    # Check that instructions were prepended as a system message
+    # and that the previous response instructions were not carried over
+    assert len(sent_messages) == 4, sent_messages
+    assert sent_messages[0].role == "system"
+    assert sent_messages[0].content == instructions
+
+    # Check the rest of the messages were converted correctly
+    assert sent_messages[1].role == "user"
+    assert sent_messages[1].content == "Name some towns in Ireland"
+    assert sent_messages[2].role == "assistant"
+    assert sent_messages[2].content == "Galway, Longford, Sligo"
+    assert sent_messages[3].role == "user"
+    assert sent_messages[3].content == "Which is the largest?"
+
+
 async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
     """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
     # Setup

From 2c43285e226ac5e9f6995fbd0725109e99b28b6b Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 20 Oct 2025 13:20:09 -0700
Subject: [PATCH 2/3] feat(stores)!: use backend storage references instead of
 configs (#3697)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

**This PR changes configurations in a backward incompatible way.**

Run configs today repeat full SQLite/Postgres snippets everywhere a
store is needed, which means duplicated credentials, extra connection
pools, and lots of drift between files. This PR introduces named storage
backends so the stack and providers can share a single catalog and
reference those backends by name.

## Key Changes

- Add `storage.backends` to `StackRunConfig`, register each KV/SQL
backend once at startup, and validate that references point to the right
family.
- Move server stores under `storage.stores` with lightweight references
(backend + namespace/table) instead of full configs.
- Update every provider/config/doc to use the new reference style;
docs/codegen now surface the simplified YAML.

## Migration

Before:
```yaml
metadata_store:
  type: sqlite
  db_path: ~/.llama/distributions/foo/registry.db
inference_store:
  type: postgres
  host: ${env.POSTGRES_HOST}
  port: ${env.POSTGRES_PORT}
  db: ${env.POSTGRES_DB}
  user: ${env.POSTGRES_USER}
  password: ${env.POSTGRES_PASSWORD}
conversations_store:
  type: postgres
  host: ${env.POSTGRES_HOST}
  port: ${env.POSTGRES_PORT}
  db: ${env.POSTGRES_DB}
  user: ${env.POSTGRES_USER}
  password: ${env.POSTGRES_PASSWORD}
```

After:
```yaml
storage:
  backends:
    kv_default:
      type: kv_sqlite
      db_path: ~/.llama/distributions/foo/kvstore.db
    sql_default:
      type: sql_postgres
      host: ${env.POSTGRES_HOST}
      port: ${env.POSTGRES_PORT}
      db: ${env.POSTGRES_DB}
      user: ${env.POSTGRES_USER}
      password: ${env.POSTGRES_PASSWORD}
  stores:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      backend: sql_default
      table_name: openai_conversations
```

Provider configs follow the same pattern—for example, a Chroma vector
adapter switches from:

```yaml
providers:
  vector_io:
  - provider_id: chromadb
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL}
      kvstore:
        type: sqlite
        db_path: ~/.llama/distributions/foo/chroma.db
```

to:

```yaml
providers:
  vector_io:
  - provider_id: chromadb
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL}
      persistence:
        backend: kv_default
        namespace: vector_io::chroma_remote
```

Once the backends are declared, everything else just points at them, so
rotating credentials or swapping to Postgres happens in one place and
the stack reuses a single connection pool.
---
 .../actions/run-and-record-tests/action.yml   |   5 +-
 .github/workflows/integration-auth-tests.yml  |  18 ++
 .../k8s-benchmark/stack-configmap.yaml        |  40 ++-
 .../k8s-benchmark/stack_run_config.yaml       |  39 ++-
 docs/docs/distributions/configuration.mdx     |  30 +-
 .../distributions/k8s/stack-configmap.yaml    | 201 +++++++++----
 .../distributions/k8s/stack_run_config.yaml   |  39 ++-
 .../agents/inline_meta-reference.mdx          |  18 +-
 .../providers/batches/inline_reference.mdx    |   6 +-
 .../providers/datasetio/inline_localfs.mdx    |   6 +-
 .../datasetio/remote_huggingface.mdx          |   6 +-
 .../providers/eval/inline_meta-reference.mdx  |   6 +-
 docs/docs/providers/files/inline_localfs.mdx  |   6 +-
 docs/docs/providers/files/remote_s3.mdx       |   6 +-
 .../providers/vector_io/inline_chromadb.mdx   |   8 +-
 .../docs/providers/vector_io/inline_faiss.mdx |   8 +-
 .../vector_io/inline_meta-reference.mdx       |   8 +-
 .../providers/vector_io/inline_milvus.mdx     |   8 +-
 .../providers/vector_io/inline_qdrant.mdx     |   8 +-
 .../providers/vector_io/inline_sqlite-vec.mdx |   8 +-
 .../providers/vector_io/inline_sqlite_vec.mdx |   8 +-
 .../providers/vector_io/remote_chromadb.mdx   |   8 +-
 .../providers/vector_io/remote_milvus.mdx     |   8 +-
 .../providers/vector_io/remote_pgvector.mdx   |   8 +-
 .../providers/vector_io/remote_qdrant.mdx     |   8 +-
 .../providers/vector_io/remote_weaviate.mdx   |   8 +-
 llama_stack/cli/stack/_build.py               |  43 ++-
 llama_stack/cli/stack/utils.py                |  23 +-
 llama_stack/core/configure.py                 |  31 ++
 .../core/conversations/conversations.py       |  26 +-
 llama_stack/core/datatypes.py                 |  86 +++---
 llama_stack/core/prompts/prompts.py           |  13 +-
 llama_stack/core/routers/__init__.py          |  13 +-
 llama_stack/core/server/quota.py              |  18 +-
 llama_stack/core/stack.py                     |  46 ++-
 llama_stack/core/storage/__init__.py          |   5 +
 llama_stack/core/storage/datatypes.py         | 283 ++++++++++++++++++
 llama_stack/core/store/registry.py            |  13 +-
 llama_stack/distributions/ci-tests/run.yaml   |  94 +++---
 .../distributions/dell/run-with-safety.yaml   |  62 ++--
 llama_stack/distributions/dell/run.yaml       |  62 ++--
 .../meta-reference-gpu/run-with-safety.yaml   |  62 ++--
 .../distributions/meta-reference-gpu/run.yaml |  62 ++--
 .../distributions/nvidia/run-with-safety.yaml |  58 ++--
 llama_stack/distributions/nvidia/run.yaml     |  54 ++--
 .../distributions/open-benchmark/run.yaml     |  74 +++--
 .../postgres-demo/postgres_demo.py            |  19 +-
 .../distributions/postgres-demo/run.yaml      |  76 ++---
 .../distributions/starter-gpu/run.yaml        |  94 +++---
 llama_stack/distributions/starter/run.yaml    |  94 +++---
 llama_stack/distributions/template.py         |  76 +++--
 llama_stack/distributions/watsonx/run.yaml    |  66 ++--
 .../inline/agents/meta_reference/agents.py    |   4 +-
 .../inline/agents/meta_reference/config.py    |  32 +-
 .../inline/batches/reference/config.py        |  12 +-
 .../inline/datasetio/localfs/config.py        |  15 +-
 .../inline/eval/meta_reference/config.py      |  15 +-
 .../providers/inline/files/localfs/config.py  |  12 +-
 .../inline/vector_io/chroma/config.py         |  12 +-
 .../inline/vector_io/faiss/config.py          |  15 +-
 .../providers/inline/vector_io/faiss/faiss.py |   2 +-
 .../inline/vector_io/milvus/config.py         |  15 +-
 .../inline/vector_io/qdrant/config.py         |  14 +-
 .../inline/vector_io/sqlite_vec/config.py     |  15 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |   2 +-
 .../remote/datasetio/huggingface/config.py    |  15 +-
 .../providers/remote/files/s3/config.py       |  12 +-
 .../remote/vector_io/chroma/chroma.py         |   2 +-
 .../remote/vector_io/chroma/config.py         |  12 +-
 .../remote/vector_io/milvus/config.py         |  12 +-
 .../remote/vector_io/milvus/milvus.py         |   2 +-
 .../remote/vector_io/pgvector/config.py       |  17 +-
 .../remote/vector_io/pgvector/pgvector.py     |   2 +-
 .../remote/vector_io/qdrant/config.py         |  15 +-
 .../remote/vector_io/qdrant/qdrant.py         |   4 +-
 .../remote/vector_io/weaviate/config.py       |  17 +-
 .../remote/vector_io/weaviate/weaviate.py     |   4 +-
 .../utils/inference/inference_store.py        |  36 +--
 llama_stack/providers/utils/kvstore/config.py | 151 +---------
 .../providers/utils/kvstore/kvstore.py        |  41 ++-
 .../utils/responses/responses_store.py        |  45 ++-
 .../utils/sqlstore/authorized_sqlstore.py     |  18 +-
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   2 +-
 .../providers/utils/sqlstore/sqlstore.py      | 114 +++----
 scripts/docker.sh                             |   2 +-
 tests/external/run-byoa.yaml                  |  18 ++
 tests/integration/fixtures/common.py          |   2 +-
 .../sqlstore/test_authorized_sqlstore.py      |  13 +-
 .../test_persistence_integration.py           |  71 +++++
 tests/unit/cli/test_stack_config.py           |  42 +++
 .../unit/conversations/test_conversations.py  |  35 ++-
 tests/unit/core/test_storage_references.py    |  84 ++++++
 tests/unit/distribution/test_distribution.py  |  79 +++--
 tests/unit/files/test_files.py                |   8 +-
 tests/unit/prompts/prompts/conftest.py        |  33 +-
 .../agent/test_meta_reference_agent.py        |  35 ++-
 .../meta_reference/test_openai_responses.py   |   8 +-
 tests/unit/providers/batches/conftest.py      |   8 +-
 tests/unit/providers/files/conftest.py        |   7 +-
 tests/unit/providers/vector_io/conftest.py    |  14 +-
 tests/unit/registry/test_registry.py          |  16 +-
 tests/unit/server/test_quota.py               |   9 +-
 tests/unit/server/test_resolver.py            |  48 ++-
 .../utils/inference/test_inference_store.py   | 260 ++++++++--------
 .../utils/responses/test_responses_store.py   |  29 +-
 105 files changed, 2290 insertions(+), 1292 deletions(-)
 create mode 100644 llama_stack/core/storage/__init__.py
 create mode 100644 llama_stack/core/storage/datatypes.py
 create mode 100644 tests/integration/test_persistence_integration.py
 create mode 100644 tests/unit/core/test_storage_references.py

diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
index a5aa31af4..3929df09c 100644
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@@ -82,11 +82,14 @@ runs:
           echo "No recording changes"
         fi
 
-    - name: Write inference logs to file
+    - name: Write docker logs to file
       if: ${{ always() }}
       shell: bash
       run: |
         sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
+        stack_container_name="llama-stack-test-$distro_name"
+        sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index ea3ff2b64..30a8063ea 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -73,6 +73,24 @@ jobs:
           image_name: kube
           apis: []
           providers: {}
+          storage:
+            backends:
+              kv_default:
+                type: kv_sqlite
+                db_path: $run_dir/kvstore.db
+              sql_default:
+                type: sql_sqlite
+                db_path: $run_dir/sql_store.db
+            stores:
+              metadata:
+                namespace: registry
+                backend: kv_default
+              inference:
+                table_name: inference_store
+                backend: sql_default
+              conversations:
+                table_name: openai_conversations
+                backend: sql_default
           server:
             port: 8321
           EOF
diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml
index bb8a48d65..e1ca170f5 100644
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@@ -98,21 +98,30 @@ data:
       - provider_id: model-context-protocol
         provider_type: remote::model-context-protocol
         config: {}
-    metadata_store:
-      type: postgres
-      host: ${env.POSTGRES_HOST:=localhost}
-      port: ${env.POSTGRES_PORT:=5432}
-      db: ${env.POSTGRES_DB:=llamastack}
-      user: ${env.POSTGRES_USER:=llamastack}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
-      table_name: llamastack_kvstore
-    inference_store:
-      type: postgres
-      host: ${env.POSTGRES_HOST:=localhost}
-      port: ${env.POSTGRES_PORT:=5432}
-      db: ${env.POSTGRES_DB:=llamastack}
-      user: ${env.POSTGRES_USER:=llamastack}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
+    storage:
+      backends:
+        kv_default:
+          type: kv_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+        sql_default:
+          type: sql_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+      references:
+        metadata:
+          backend: kv_default
+          namespace: registry
+        inference:
+          backend: sql_default
+          table_name: inference_store
     models:
     - metadata:
         embedding_dimension: 768
@@ -137,5 +146,4 @@ data:
       port: 8323
 kind: ConfigMap
 metadata:
-  creationTimestamp: null
   name: llama-stack-config
diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml
index e2fbfd7a4..2ccaa21aa 100644
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@@ -95,21 +95,30 @@ providers:
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
     config: {}
-metadata_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-  table_name: llamastack_kvstore
-inference_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata:
     embedding_dimension: 768
diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx
index 81243c97b..bf3156865 100644
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@@ -44,18 +44,32 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
+      persistence:
+        agent_state:
+          backend: kv_default
+          namespace: agents
+        responses:
+          backend: sql_default
+          table_name: responses
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
-metadata_store:
-  namespace: null
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml
index 3dbb0da97..c71ab05d8 100644
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@@ -1,56 +1,155 @@
 apiVersion: v1
 data:
-  stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
-    inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
-    \ inference:\n  - provider_id: vllm-inference\n    provider_type: remote::vllm\n
-    \   config:\n      url: ${env.VLLM_URL:=http://localhost:8000/v1}\n      max_tokens:
-    ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n      tls_verify:
-    ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: vllm-safety\n    provider_type:
-    remote::vllm\n    config:\n      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
-    \     max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n
-    \     tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: sentence-transformers\n
-    \   provider_type: inline::sentence-transformers\n    config: {}\n  vector_io:\n
-    \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n    provider_type: remote::chromadb\n
-    \   config:\n      url: ${env.CHROMADB_URL:=}\n      kvstore:\n        type: postgres\n
-    \       host: ${env.POSTGRES_HOST:=localhost}\n        port: ${env.POSTGRES_PORT:=5432}\n
-    \       db: ${env.POSTGRES_DB:=llamastack}\n        user: ${env.POSTGRES_USER:=llamastack}\n
-    \       password: ${env.POSTGRES_PASSWORD:=llamastack}\n  files:\n  - provider_id:
-    meta-reference-files\n    provider_type: inline::localfs\n    config:\n      storage_dir:
-    ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n      metadata_store:\n
-    \       type: sqlite\n        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
-    \ \n  safety:\n  - provider_id: llama-guard\n    provider_type: inline::llama-guard\n
-    \   config:\n      excluded_categories: []\n  agents:\n  - provider_id: meta-reference\n
-    \   provider_type: inline::meta-reference\n    config:\n      persistence_store:\n
-    \       type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n        port:
-    ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n        user:
-    ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
-    \     responses_store:\n        type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n
-    \       port: ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n
-    \       user: ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
-    \ telemetry:\n  - provider_id: meta-reference\n    provider_type: inline::meta-reference\n
-    \   config:\n      service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n      sinks:
-    ${env.TELEMETRY_SINKS:=console}\n  tool_runtime:\n  - provider_id: brave-search\n
-    \   provider_type: remote::brave-search\n    config:\n      api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
-    \     max_results: 3\n  - provider_id: tavily-search\n    provider_type: remote::tavily-search\n
-    \   config:\n      api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n      max_results:
-    3\n  - provider_id: rag-runtime\n    provider_type: inline::rag-runtime\n    config:
-    {}\n  - provider_id: model-context-protocol\n    provider_type: remote::model-context-protocol\n
-    \   config: {}\nmetadata_store:\n  type: postgres\n  host: ${env.POSTGRES_HOST:=localhost}\n
-    \ port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n  user:
-    ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\n
-    \ table_name: llamastack_kvstore\ninference_store:\n  type: postgres\n  host:
-    ${env.POSTGRES_HOST:=localhost}\n  port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n
-    \ user: ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
-    metadata:\n    embedding_dimension: 384\n  model_id: all-MiniLM-L6-v2\n  provider_id:
-    sentence-transformers\n  model_type: embedding\n- metadata: {}\n  model_id: ${env.INFERENCE_MODEL}\n
-    \ provider_id: vllm-inference\n  model_type: llm\n- metadata: {}\n  model_id:
-    ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n  provider_id: vllm-safety\n
-    \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
-    []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
-    builtin::websearch\n  provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
-    \ provider_id: rag-runtime\nserver:\n  port: 8321\n  auth:\n    provider_config:\n
-    \     type: github_token\n"
+  stack_run_config.yaml: |
+    version: '2'
+    image_name: kubernetes-demo
+    apis:
+    - agents
+    - inference
+    - files
+    - safety
+    - telemetry
+    - tool_runtime
+    - vector_io
+    providers:
+      inference:
+      - provider_id: vllm-inference
+        provider_type: remote::vllm
+        config:
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+      - provider_id: vllm-safety
+        provider_type: remote::vllm
+        config:
+          url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+      - provider_id: sentence-transformers
+        provider_type: inline::sentence-transformers
+        config: {}
+      vector_io:
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+        provider_type: remote::chromadb
+        config:
+          url: ${env.CHROMADB_URL:=}
+          kvstore:
+            type: postgres
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
+      files:
+      - provider_id: meta-reference-files
+        provider_type: inline::localfs
+        config:
+          storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+          metadata_store:
+            type: sqlite
+            db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+      safety:
+      - provider_id: llama-guard
+        provider_type: inline::llama-guard
+        config:
+          excluded_categories: []
+      agents:
+      - provider_id: meta-reference
+        provider_type: inline::meta-reference
+        config:
+          persistence_store:
+            type: postgres
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
+          responses_store:
+            type: postgres
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
+      telemetry:
+      - provider_id: meta-reference
+        provider_type: inline::meta-reference
+        config:
+          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+          sinks: ${env.TELEMETRY_SINKS:=console}
+      tool_runtime:
+      - provider_id: brave-search
+        provider_type: remote::brave-search
+        config:
+          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
+          max_results: 3
+      - provider_id: tavily-search
+        provider_type: remote::tavily-search
+        config:
+          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
+          max_results: 3
+      - provider_id: rag-runtime
+        provider_type: inline::rag-runtime
+        config: {}
+      - provider_id: model-context-protocol
+        provider_type: remote::model-context-protocol
+        config: {}
+    storage:
+      backends:
+        kv_default:
+          type: kv_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+        sql_default:
+          type: sql_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+      references:
+        metadata:
+          backend: kv_default
+          namespace: registry
+        inference:
+          backend: sql_default
+          table_name: inference_store
+    models:
+    - metadata:
+        embedding_dimension: 768
+      model_id: nomic-embed-text-v1.5
+      provider_id: sentence-transformers
+      model_type: embedding
+    - metadata: {}
+      model_id: ${env.INFERENCE_MODEL}
+      provider_id: vllm-inference
+      model_type: llm
+    - metadata: {}
+      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
+      provider_id: vllm-safety
+      model_type: llm
+    shields:
+    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
+    vector_dbs: []
+    datasets: []
+    scoring_fns: []
+    benchmarks: []
+    tool_groups:
+    - toolgroup_id: builtin::websearch
+      provider_id: tavily-search
+    - toolgroup_id: builtin::rag
+      provider_id: rag-runtime
+    server:
+      port: 8321
+      auth:
+        provider_config:
+          type: github_token
 kind: ConfigMap
 metadata:
-  creationTimestamp: null
   name: llama-stack-config
diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml
index ee28a1ea8..863565fdf 100644
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@@ -93,21 +93,30 @@ providers:
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
     config: {}
-metadata_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-  table_name: llamastack_kvstore
-inference_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata:
     embedding_dimension: 768
diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx
index fd961745f..fac9b8406 100644
--- a/docs/docs/providers/agents/inline_meta-reference.mdx
+++ b/docs/docs/providers/agents/inline_meta-reference.mdx
@@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
-| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
-persistence_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
-responses_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
+persistence:
+  agent_state:
+    namespace: agents
+    backend: kv_default
+  responses:
+    table_name: responses
+    backend: sql_default
+    max_write_queue_size: 10000
+    num_writers: 4
 ```
diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx
index f43800555..45304fbb1 100644
--- a/docs/docs/providers/batches/inline_reference.mdx
+++ b/docs/docs/providers/batches/inline_reference.mdx
@@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Configuration for the key-value store backend. |
 | `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
 | `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
 
@@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence.
 
 ```yaml
 kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db
+  namespace: batches
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx
index b02a3a3bd..a9363376c 100644
--- a/docs/docs/providers/datasetio/inline_localfs.mdx
+++ b/docs/docs/providers/datasetio/inline_localfs.mdx
@@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
 kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
+  namespace: datasetio::localfs
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx
index 82597d999..de3ffaaa6 100644
--- a/docs/docs/providers/datasetio/remote_huggingface.mdx
+++ b/docs/docs/providers/datasetio/remote_huggingface.mdx
@@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
 kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
+  namespace: datasetio::huggingface
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx
index b0eb589e0..2c86c18c9 100644
--- a/docs/docs/providers/eval/inline_meta-reference.mdx
+++ b/docs/docs/providers/eval/inline_meta-reference.mdx
@@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
 kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
+  namespace: eval
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx
index 86d141f93..bff0c4eb9 100644
--- a/docs/docs/providers/files/inline_localfs.mdx
+++ b/docs/docs/providers/files/inline_localfs.mdx
@@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `storage_dir` | `<class 'str'>` | No |  | Directory to store uploaded files |
-| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
 | `ttl_secs` | `<class 'int'>` | No | 31536000 |  |
 
 ## Sample Configuration
@@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo
 ```yaml
 storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
 metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
+  table_name: files_metadata
+  backend: sql_default
 ```
diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx
index 353cedbfb..65cd545c5 100644
--- a/docs/docs/providers/files/remote_s3.mdx
+++ b/docs/docs/providers/files/remote_s3.mdx
@@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad
 | `aws_secret_access_key` | `str \| None` | No |  | AWS secret access key (optional if using IAM roles) |
 | `endpoint_url` | `str \| None` | No |  | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
 | `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
-| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
 
 ## Sample Configuration
 
@@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
 endpoint_url: ${env.S3_ENDPOINT_URL:=}
 auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
 metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db
+  table_name: s3_files_metadata
+  backend: sql_default
 ```
diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx
index a1858eacc..0be5cd5b3 100644
--- a/docs/docs/providers/vector_io/inline_chromadb.mdx
+++ b/docs/docs/providers/vector_io/inline_chromadb.mdx
@@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 
 ## Sample Configuration
 
 ```yaml
 db_path: ${env.CHROMADB_PATH}
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db
+persistence:
+  namespace: vector_io::chroma
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx
index 03bc2a928..3a1fba055 100644
--- a/docs/docs/providers/vector_io/inline_faiss.mdx
+++ b/docs/docs/providers/vector_io/inline_faiss.mdx
@@ -95,12 +95,12 @@ more details about Faiss in general.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+persistence:
+  namespace: vector_io::faiss
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx
index bcad86750..17fd40cf5 100644
--- a/docs/docs/providers/vector_io/inline_meta-reference.mdx
+++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx
@@ -14,14 +14,14 @@ Meta's reference implementation of a vector database.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+persistence:
+  namespace: vector_io::faiss
+  backend: kv_default
 ```
 ## Deprecation Notice
 
diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx
index 7e6f15c81..6063edab1 100644
--- a/docs/docs/providers/vector_io/inline_milvus.mdx
+++ b/docs/docs/providers/vector_io/inline_milvus.mdx
@@ -17,14 +17,14 @@ Please refer to the remote provider documentation.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
 
 ## Sample Configuration
 
 ```yaml
 db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
+persistence:
+  namespace: vector_io::milvus
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx
index 5c9ab10f2..057d96761 100644
--- a/docs/docs/providers/vector_io/inline_qdrant.mdx
+++ b/docs/docs/providers/vector_io/inline_qdrant.mdx
@@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
 path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
+persistence:
+  namespace: vector_io::qdrant
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index aa6992a56..98a372250 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
 ```yaml
 db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
+persistence:
+  namespace: vector_io::sqlite_vec
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
index 7f69f617d..67cbd0021 100644
--- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
@@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
 ```yaml
 db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
+persistence:
+  namespace: vector_io::sqlite_vec
+  backend: kv_default
 ```
 ## Deprecation Notice
 
diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx
index 807771003..2aee3eeca 100644
--- a/docs/docs/providers/vector_io/remote_chromadb.mdx
+++ b/docs/docs/providers/vector_io/remote_chromadb.mdx
@@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `str \| None` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 
 ## Sample Configuration
 
 ```yaml
 url: ${env.CHROMADB_URL}
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db
+persistence:
+  namespace: vector_io::chroma_remote
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx
index 7f7c08122..bf9935d61 100644
--- a/docs/docs/providers/vector_io/remote_milvus.mdx
+++ b/docs/docs/providers/vector_io/remote_milvus.mdx
@@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
 | `uri` | `<class 'str'>` | No |  | The URI of the Milvus server |
 | `token` | `str \| None` | No |  | The token of the Milvus server |
 | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
 
 :::note
@@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo
 ```yaml
 uri: ${env.MILVUS_ENDPOINT}
 token: ${env.MILVUS_TOKEN}
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db
+persistence:
+  namespace: vector_io::milvus_remote
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx
index d21810c68..cb70f35d1 100644
--- a/docs/docs/providers/vector_io/remote_pgvector.mdx
+++ b/docs/docs/providers/vector_io/remote_pgvector.mdx
@@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
 | `db` | `str \| None` | No | postgres |  |
 | `user` | `str \| None` | No | postgres |  |
 | `password` | `str \| None` | No | mysecretpassword |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
@@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432}
 db: ${env.PGVECTOR_DB}
 user: ${env.PGVECTOR_USER}
 password: ${env.PGVECTOR_PASSWORD}
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db
+persistence:
+  namespace: vector_io::pgvector
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx
index c44a2b937..dff9642b5 100644
--- a/docs/docs/providers/vector_io/remote_qdrant.mdx
+++ b/docs/docs/providers/vector_io/remote_qdrant.mdx
@@ -26,13 +26,13 @@ Please refer to the inline provider documentation.
 | `prefix` | `str \| None` | No |  |  |
 | `timeout` | `int \| None` | No |  |  |
 | `host` | `str \| None` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 
 ## Sample Configuration
 
 ```yaml
 api_key: ${env.QDRANT_API_KEY:=}
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
+persistence:
+  namespace: vector_io::qdrant_remote
+  backend: kv_default
 ```
diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx
index 3f1e36422..b809bed2e 100644
--- a/docs/docs/providers/vector_io/remote_weaviate.mdx
+++ b/docs/docs/providers/vector_io/remote_weaviate.mdx
@@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
 |-------|------|----------|---------|-------------|
 | `weaviate_api_key` | `str \| None` | No |  | The API key for the Weaviate instance |
 | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
 ```yaml
 weaviate_api_key: null
 weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
-kvstore:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db
+persistence:
+  namespace: vector_io::weaviate
+  backend: kv_default
 ```
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index 471d5cb66..2a30ff394 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -40,12 +40,20 @@ from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.resolver import InvalidProviderError
 from llama_stack.core.stack import replace_env_vars
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.exec import formulate_run_args, run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 
 DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
 
@@ -286,21 +294,42 @@ def _generate_run_config(
     Generate a run.yaml template file for user to edit from a build.yaml file
     """
     apis = list(build_config.distribution_spec.providers.keys())
+    distro_dir = DISTRIBS_BASE_DIR / image_name
+    storage = StorageConfig(
+        backends={
+            "kv_default": SqliteKVStoreConfig(
+                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
+            ),
+            "sql_default": SqliteSqlStoreConfig(
+                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
+            ),
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(
+                backend="kv_default",
+                namespace="registry",
+            ),
+            inference=InferenceStoreReference(
+                backend="sql_default",
+                table_name="inference_store",
+            ),
+            conversations=SqlStoreReference(
+                backend="sql_default",
+                table_name="openai_conversations",
+            ),
+        ),
+    )
+
     run_config = StackRunConfig(
         container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
         image_name=image_name,
         apis=apis,
         providers={},
+        storage=storage,
         external_providers_dir=build_config.external_providers_dir
         if build_config.external_providers_dir
         else EXTERNAL_PROVIDERS_DIR,
     )
-    if not run_config.inference_store:
-        run_config.inference_store = SqliteSqlStoreConfig(
-            **SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
-            )
-        )
     # build providers dict
     provider_registry = get_provider_registry(build_config)
     for api in apis:
diff --git a/llama_stack/cli/stack/utils.py b/llama_stack/cli/stack/utils.py
index 4d4c1b538..cc1ca051b 100644
--- a/llama_stack/cli/stack/utils.py
+++ b/llama_stack/cli/stack/utils.py
@@ -17,10 +17,19 @@ from llama_stack.core.datatypes import (
     BuildConfig,
     Provider,
     StackRunConfig,
+    StorageConfig,
 )
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.resolver import InvalidProviderError
-from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+)
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
@@ -51,11 +60,23 @@ def generate_run_config(
     Generate a run.yaml template file for user to edit from a build.yaml file
     """
     apis = list(build_config.distribution_spec.providers.keys())
+    distro_dir = DISTRIBS_BASE_DIR / image_name
     run_config = StackRunConfig(
         container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
         image_name=image_name,
         apis=apis,
         providers={},
+        storage=StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
+                "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
+            },
+            stores=ServerStoresConfig(
+                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
+            ),
+        ),
         external_providers_dir=build_config.external_providers_dir
         if build_config.external_providers_dir
         else EXTERNAL_PROVIDERS_DIR,
diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py
index bfa2c6d71..734839ea9 100644
--- a/llama_stack/core/configure.py
+++ b/llama_stack/core/configure.py
@@ -159,6 +159,37 @@ def upgrade_from_routing_table(
     config_dict["apis"] = config_dict["apis_to_serve"]
     config_dict.pop("apis_to_serve", None)
 
+    # Add default storage config if not present
+    if "storage" not in config_dict:
+        config_dict["storage"] = {
+            "backends": {
+                "kv_default": {
+                    "type": "kv_sqlite",
+                    "db_path": "~/.llama/kvstore.db",
+                },
+                "sql_default": {
+                    "type": "sql_sqlite",
+                    "db_path": "~/.llama/sql_store.db",
+                },
+            },
+            "stores": {
+                "metadata": {
+                    "namespace": "registry",
+                    "backend": "kv_default",
+                },
+                "inference": {
+                    "table_name": "inference_store",
+                    "backend": "sql_default",
+                    "max_write_queue_size": 10000,
+                    "num_writers": 4,
+                },
+                "conversations": {
+                    "table_name": "openai_conversations",
+                    "backend": "sql_default",
+                },
+            },
+        }
+
     return config_dict
 
 
diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py
index d2537c7ee..66880ca36 100644
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import os
 import secrets
 import time
 from typing import Any
@@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
     Conversations,
     Metadata,
 )
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
 from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import (
-    SqliteSqlStoreConfig,
-    SqlStoreConfig,
-    sqlstore_impl,
-)
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
 
 logger = get_logger(name=__name__, category="openai_conversations")
 
@@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
 class ConversationServiceConfig(BaseModel):
     """Configuration for the built-in conversation service.
 
-    :param conversations_store: SQL store configuration for conversations (defaults to SQLite)
+    :param run_config: Stack run configuration for resolving persistence
     :param policy: Access control rules
     """
 
-    conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
-        db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
-    )
+    run_config: StackRunConfig
     policy: list[AccessRule] = []
 
 
@@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
         self.deps = deps
         self.policy = config.policy
 
-        base_sql_store = sqlstore_impl(config.conversations_store)
+        # Use conversations store reference from run config
+        conversations_ref = config.run_config.storage.stores.conversations
+        if not conversations_ref:
+            raise ValueError("storage.stores.conversations must be configured in run config")
+
+        base_sql_store = sqlstore_impl(conversations_ref)
         self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
 
     async def initialize(self) -> None:
         """Initialize the store and create tables."""
-        if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
-            os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
-
         await self.sql_store.create_table(
             "openai_conversations",
             {
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index 94222d49e..d692da3b3 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -26,9 +26,12 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.access_control.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import (
+    KVStoreReference,
+    StorageBackendType,
+    StorageConfig,
+)
 from llama_stack.providers.datatypes import Api, ProviderSpec
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
 
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
@@ -356,7 +359,7 @@ class QuotaPeriod(StrEnum):
 
 
 class QuotaConfig(BaseModel):
-    kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
     anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
     authenticated_max_requests: int = Field(
         default=1000, description="Max requests for authenticated clients per period"
@@ -438,18 +441,6 @@ class ServerConfig(BaseModel):
     )
 
 
-class InferenceStoreConfig(BaseModel):
-    sql_store_config: SqlStoreConfig
-    max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
-    num_writers: int = Field(default=4, description="Number of concurrent background writers")
-
-
-class ResponsesStoreConfig(BaseModel):
-    sql_store_config: SqlStoreConfig
-    max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
-    num_writers: int = Field(default=4, description="Number of concurrent background writers")
-
-
 class StackRunConfig(BaseModel):
     version: int = LLAMA_STACK_RUN_CONFIG_VERSION
 
@@ -476,26 +467,8 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
 can be instantiated multiple times (with different configs) if necessary.
 """,
     )
-    metadata_store: KVStoreConfig | None = Field(
-        default=None,
-        description="""
-Configuration for the persistence store used by the distribution registry. If not specified,
-a default SQLite store will be used.""",
-    )
-
-    inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
-        default=None,
-        description="""
-Configuration for the persistence store used by the inference API. Can be either a
-InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
-If not specified, a default SQLite store will be used.""",
-    )
-
-    conversations_store: SqlStoreConfig | None = Field(
-        default=None,
-        description="""
-Configuration for the persistence store used by the conversations API.
-If not specified, a default SQLite store will be used.""",
+    storage: StorageConfig = Field(
+        description="Catalog of named storage backends and references available to the stack",
     )
 
     # registry of "resources" in the distribution
@@ -535,6 +508,49 @@ If not specified, a default SQLite store will be used.""",
             return Path(v)
         return v
 
+    @model_validator(mode="after")
+    def validate_server_stores(self) -> "StackRunConfig":
+        backend_map = self.storage.backends
+        stores = self.storage.stores
+        kv_backends = {
+            name
+            for name, cfg in backend_map.items()
+            if cfg.type
+            in {
+                StorageBackendType.KV_REDIS,
+                StorageBackendType.KV_SQLITE,
+                StorageBackendType.KV_POSTGRES,
+                StorageBackendType.KV_MONGODB,
+            }
+        }
+        sql_backends = {
+            name
+            for name, cfg in backend_map.items()
+            if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
+        }
+
+        def _ensure_backend(reference, expected_set, store_name: str) -> None:
+            if reference is None:
+                return
+            backend_name = reference.backend
+            if backend_name not in backend_map:
+                raise ValueError(
+                    f"{store_name} references unknown backend '{backend_name}'. "
+                    f"Available backends: {sorted(backend_map)}"
+                )
+            if backend_name not in expected_set:
+                raise ValueError(
+                    f"{store_name} references backend '{backend_name}' of type "
+                    f"'{backend_map[backend_name].type.value}', but a backend of type "
+                    f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
+                )
+
+        _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
+        _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
+        _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
+        _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
+        return self
+
 
 class BuildConfig(BaseModel):
     version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py
index 26e8f5cef..856397ca5 100644
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@@ -11,9 +11,8 @@ from pydantic import BaseModel
 
 from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 
 
 class PromptServiceConfig(BaseModel):
@@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
         self.kvstore: KVStore
 
     async def initialize(self) -> None:
-        kvstore_config = SqliteKVStoreConfig(
-            db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
-        )
-        self.kvstore = await kvstore_impl(kvstore_config)
+        # Use metadata store backend with prompts-specific namespace
+        metadata_ref = self.config.run_config.storage.stores.metadata
+        if not metadata_ref:
+            raise ValueError("storage.stores.metadata must be configured in run config")
+        prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
+        self.kvstore = await kvstore_impl(prompts_ref)
 
     def _get_default_key(self, prompt_id: str) -> str:
         """Get the KVStore key that stores the default version number."""
diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py
index 4463d2460..0573fc2c7 100644
--- a/llama_stack/core/routers/__init__.py
+++ b/llama_stack/core/routers/__init__.py
@@ -6,7 +6,10 @@
 
 from typing import Any
 
-from llama_stack.core.datatypes import AccessRule, RoutedProtocol
+from llama_stack.core.datatypes import (
+    AccessRule,
+    RoutedProtocol,
+)
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
@@ -76,9 +79,13 @@ async def get_auto_router_impl(
                 api_to_dep_impl[dep_name] = deps[dep_api]
 
     # TODO: move pass configs to routers instead
-    if api == Api.inference and run_config.inference_store:
+    if api == Api.inference:
+        inference_ref = run_config.storage.stores.inference
+        if not inference_ref:
+            raise ValueError("storage.stores.inference must be configured in run config")
+
         inference_store = InferenceStore(
-            config=run_config.inference_store,
+            reference=inference_ref,
             policy=policy,
         )
         await inference_store.initialize()
diff --git a/llama_stack/core/server/quota.py b/llama_stack/core/server/quota.py
index 693f224c3..689f0e4c3 100644
--- a/llama_stack/core/server/quota.py
+++ b/llama_stack/core/server/quota.py
@@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
 
 from starlette.types import ASGIApp, Receive, Scope, Send
 
+from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
 
 logger = get_logger(name=__name__, category="core::server")
 
@@ -33,7 +33,7 @@ class QuotaMiddleware:
     def __init__(
         self,
         app: ASGIApp,
-        kv_config: KVStoreConfig,
+        kv_config: KVStoreReference,
         anonymous_max_requests: int,
         authenticated_max_requests: int,
         window_seconds: int = 86400,
@@ -45,15 +45,15 @@ class QuotaMiddleware:
         self.authenticated_max_requests = authenticated_max_requests
         self.window_seconds = window_seconds
 
-        if isinstance(self.kv_config, SqliteKVStoreConfig):
-            logger.warning(
-                "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
-                f"window_seconds={self.window_seconds}"
-            )
-
     async def _get_kv(self) -> KVStore:
         if self.kv is None:
             self.kv = await kvstore_impl(self.kv_config)
+            backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
+            if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
+                logger.warning(
+                    "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
+                    f"window_seconds={self.window_seconds}"
+                )
         return self.kv
 
     async def __call__(self, scope: Scope, receive: Receive, send: Send):
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index 733b55262..15d0198b1 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -42,6 +42,16 @@ from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceI
 from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
 from llama_stack.core.resolver import ProviderRegistry, resolve_impls
 from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageBackendConfig,
+    StorageConfig,
+)
 from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
@@ -329,6 +339,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
     impls[Api.conversations] = conversations_impl
 
 
+def _initialize_storage(run_config: StackRunConfig):
+    kv_backends: dict[str, StorageBackendConfig] = {}
+    sql_backends: dict[str, StorageBackendConfig] = {}
+    for backend_name, backend_config in run_config.storage.backends.items():
+        type = backend_config.type.value
+        if type.startswith("kv_"):
+            kv_backends[backend_name] = backend_config
+        elif type.startswith("sql_"):
+            sql_backends[backend_name] = backend_config
+        else:
+            raise ValueError(f"Unknown storage backend type: {type}")
+
+    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
+    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+    register_kvstore_backends(kv_backends)
+    register_sqlstore_backends(sql_backends)
+
+
 class Stack:
     def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
         self.run_config = run_config
@@ -347,7 +376,11 @@ class Stack:
                 TEST_RECORDING_CONTEXT.__enter__()
                 logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
 
-        dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
+        _initialize_storage(self.run_config)
+        stores = self.run_config.storage.stores
+        if not stores.metadata:
+            raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
+        dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
         policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
 
         internal_impls = {}
@@ -488,5 +521,16 @@ def run_config_from_adhoc_config_spec(
         image_name="distro-test",
         apis=list(provider_configs_by_api.keys()),
         providers=provider_configs_by_api,
+        storage=StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
+                "sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
+            },
+            stores=ServerStoresConfig(
+                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
+            ),
+        ),
     )
     return config
diff --git a/llama_stack/core/storage/__init__.py b/llama_stack/core/storage/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/core/storage/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py
new file mode 100644
index 000000000..9df170e10
--- /dev/null
+++ b/llama_stack/core/storage/datatypes.py
@@ -0,0 +1,283 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import re
+from abc import abstractmethod
+from enum import StrEnum
+from pathlib import Path
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class StorageBackendType(StrEnum):
+    KV_REDIS = "kv_redis"
+    KV_SQLITE = "kv_sqlite"
+    KV_POSTGRES = "kv_postgres"
+    KV_MONGODB = "kv_mongodb"
+    SQL_SQLITE = "sql_sqlite"
+    SQL_POSTGRES = "sql_postgres"
+
+
+class CommonConfig(BaseModel):
+    namespace: str | None = Field(
+        default=None,
+        description="All keys will be prefixed with this namespace",
+    )
+
+
+class RedisKVStoreConfig(CommonConfig):
+    type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
+    host: str = "localhost"
+    port: int = 6379
+
+    @property
+    def url(self) -> str:
+        return f"redis://{self.host}:{self.port}"
+
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return ["redis"]
+
+    @classmethod
+    def sample_run_config(cls):
+        return {
+            "type": StorageBackendType.KV_REDIS.value,
+            "host": "${env.REDIS_HOST:=localhost}",
+            "port": "${env.REDIS_PORT:=6379}",
+        }
+
+
+class SqliteKVStoreConfig(CommonConfig):
+    type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
+    db_path: str = Field(
+        description="File path for the sqlite database",
+    )
+
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return ["aiosqlite"]
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
+        return {
+            "type": StorageBackendType.KV_SQLITE.value,
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
+        }
+
+
+class PostgresKVStoreConfig(CommonConfig):
+    type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
+    host: str = "localhost"
+    port: int | str = 5432
+    db: str = "llamastack"
+    user: str
+    password: str | None = None
+    ssl_mode: str | None = None
+    ca_cert_path: str | None = None
+    table_name: str = "llamastack_kvstore"
+
+    @classmethod
+    def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
+        return {
+            "type": StorageBackendType.KV_POSTGRES.value,
+            "host": "${env.POSTGRES_HOST:=localhost}",
+            "port": "${env.POSTGRES_PORT:=5432}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
+            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
+        }
+
+    @classmethod
+    @field_validator("table_name")
+    def validate_table_name(cls, v: str) -> str:
+        # PostgreSQL identifiers rules:
+        # - Must start with a letter or underscore
+        # - Can contain letters, numbers, and underscores
+        # - Maximum length is 63 bytes
+        pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
+        if not re.match(pattern, v):
+            raise ValueError(
+                "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
+            )
+        if len(v) > 63:
+            raise ValueError("Table name must be less than 63 characters")
+        return v
+
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return ["psycopg2-binary"]
+
+
+class MongoDBKVStoreConfig(CommonConfig):
+    type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
+    host: str = "localhost"
+    port: int = 27017
+    db: str = "llamastack"
+    user: str | None = None
+    password: str | None = None
+    collection_name: str = "llamastack_kvstore"
+
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return ["pymongo"]
+
+    @classmethod
+    def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
+        return {
+            "type": StorageBackendType.KV_MONGODB.value,
+            "host": "${env.MONGODB_HOST:=localhost}",
+            "port": "${env.MONGODB_PORT:=5432}",
+            "db": "${env.MONGODB_DB}",
+            "user": "${env.MONGODB_USER}",
+            "password": "${env.MONGODB_PASSWORD}",
+            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
+        }
+
+
+class SqlAlchemySqlStoreConfig(BaseModel):
+    @property
+    @abstractmethod
+    def engine_str(self) -> str: ...
+
+    # TODO: move this when we have a better way to specify dependencies with internal APIs
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return ["sqlalchemy[asyncio]"]
+
+
+class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
+    type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
+    db_path: str = Field(
+        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
+    )
+
+    @property
+    def engine_str(self) -> str:
+        return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
+        return {
+            "type": StorageBackendType.SQL_SQLITE.value,
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
+        }
+
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return super().pip_packages() + ["aiosqlite"]
+
+
+class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
+    type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
+    host: str = "localhost"
+    port: int | str = 5432
+    db: str = "llamastack"
+    user: str
+    password: str | None = None
+
+    @property
+    def engine_str(self) -> str:
+        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
+
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return super().pip_packages() + ["asyncpg"]
+
+    @classmethod
+    def sample_run_config(cls, **kwargs):
+        return {
+            "type": StorageBackendType.SQL_POSTGRES.value,
+            "host": "${env.POSTGRES_HOST:=localhost}",
+            "port": "${env.POSTGRES_PORT:=5432}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
+        }
+
+
+# reference = (backend_name, table_name)
+class SqlStoreReference(BaseModel):
+    """A reference to a 'SQL-like' persistent store. A table name must be provided."""
+
+    table_name: str = Field(
+        description="Name of the table to use for the SqlStore",
+    )
+
+    backend: str = Field(
+        description="Name of backend from storage.backends",
+    )
+
+
+# reference = (backend_name, namespace)
+class KVStoreReference(BaseModel):
+    """A reference to a 'key-value' persistent store. A namespace must be provided."""
+
+    namespace: str = Field(
+        description="Key prefix for KVStore backends",
+    )
+
+    backend: str = Field(
+        description="Name of backend from storage.backends",
+    )
+
+
+StorageBackendConfig = Annotated[
+    RedisKVStoreConfig
+    | SqliteKVStoreConfig
+    | PostgresKVStoreConfig
+    | MongoDBKVStoreConfig
+    | SqliteSqlStoreConfig
+    | PostgresSqlStoreConfig,
+    Field(discriminator="type"),
+]
+
+
+class InferenceStoreReference(SqlStoreReference):
+    """Inference store configuration with queue tuning."""
+
+    max_write_queue_size: int = Field(
+        default=10000,
+        description="Max queued writes for inference store",
+    )
+    num_writers: int = Field(
+        default=4,
+        description="Number of concurrent background writers",
+    )
+
+
+class ResponsesStoreReference(InferenceStoreReference):
+    """Responses store configuration with queue tuning."""
+
+
+class ServerStoresConfig(BaseModel):
+    metadata: KVStoreReference | None = Field(
+        default=None,
+        description="Metadata store configuration (uses KV backend)",
+    )
+    inference: InferenceStoreReference | None = Field(
+        default=None,
+        description="Inference store configuration (uses SQL backend)",
+    )
+    conversations: SqlStoreReference | None = Field(
+        default=None,
+        description="Conversations store configuration (uses SQL backend)",
+    )
+    responses: ResponsesStoreReference | None = Field(
+        default=None,
+        description="Responses store configuration (uses SQL backend)",
+    )
+
+
+class StorageConfig(BaseModel):
+    backends: dict[str, StorageBackendConfig] = Field(
+        description="Named backend configurations (e.g., 'default', 'cache')",
+    )
+    stores: ServerStoresConfig = Field(
+        default_factory=lambda: ServerStoresConfig(),
+        description="Named references to storage backends used by the stack core",
+    )
diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py
index 04581bab5..6ff9e575b 100644
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@@ -11,10 +11,9 @@ from typing import Protocol
 import pydantic
 
 from llama_stack.core.datatypes import RoutableObjectWithProvider
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 
 logger = get_logger(__name__, category="core::registry")
 
@@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
 
 
 async def create_dist_registry(
-    metadata_store: KVStoreConfig | None,
-    image_name: str,
+    metadata_store: KVStoreReference, image_name: str
 ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
     # instantiate kvstore for storing and retrieving distribution metadata
-    if metadata_store:
-        dist_kvstore = await kvstore_impl(metadata_store)
-    else:
-        dist_kvstore = await kvstore_impl(
-            SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
-        )
+    dist_kvstore = await kvstore_impl(metadata_store)
     dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
     await dist_registry.initialize()
     return dist_registry, dist_kvstore
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index a6a6b7c0d..f9e741474 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -93,30 +93,30 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
   - provider_id: ${env.MILVUS_URL:+milvus}
     provider_type: inline::milvus
     config:
       db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
   - provider_id: ${env.CHROMADB_URL:+chromadb}
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   - provider_id: ${env.PGVECTOR_DB:+pgvector}
     provider_type: remote::pgvector
     config:
@@ -125,17 +125,17 @@ providers:
       db: ${env.PGVECTOR_DB:=}
       user: ${env.PGVECTOR_USER:=}
       password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
       storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
       metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
+        table_name: files_metadata
+        backend: sql_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -147,12 +147,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   post_training:
   - provider_id: torchtune-cpu
     provider_type: inline::torchtune-cpu
@@ -163,21 +166,21 @@ providers:
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -207,17 +210,28 @@ providers:
     provider_type: inline::reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml
index 5da3cf511..3130285b9 100644
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@@ -26,9 +26,9 @@ providers:
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -38,32 +38,35 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -86,15 +89,26 @@ providers:
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml
index ac0fdc0fa..af1a96a21 100644
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@@ -22,9 +22,9 @@ providers:
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -34,32 +34,35 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -82,15 +85,26 @@ providers:
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
index 874c5050f..b43d1ff19 100644
--- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -37,9 +37,9 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -49,32 +49,35 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -99,15 +102,26 @@ providers:
     provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml
index 50553d2c7..59e2d8129 100644
--- a/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -27,9 +27,9 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -39,32 +39,35 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,15 +92,26 @@ providers:
     provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml
index e0482f67d..e06787d0b 100644
--- a/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -28,9 +28,9 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   safety:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -41,12 +41,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -65,8 +68,8 @@ providers:
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
@@ -86,17 +89,28 @@ providers:
     config:
       storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
       metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml
index 950782eed..85e0743e4 100644
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@@ -23,9 +23,9 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   safety:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -36,12 +36,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -75,17 +78,28 @@ providers:
     config:
       storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
       metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields: []
 vector_dbs: []
diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml
index a738887b4..2c6936bfc 100644
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@@ -39,16 +39,16 @@ providers:
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
   - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
@@ -57,9 +57,9 @@ providers:
       db: ${env.PGVECTOR_DB:=}
       user: ${env.PGVECTOR_USER:=}
       password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -69,32 +69,35 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -119,15 +122,26 @@ providers:
     provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: gpt-4o
diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/llama_stack/distributions/postgres-demo/postgres_demo.py
index 1f3e88b3b..876370ef3 100644
--- a/llama_stack/distributions/postgres-demo/postgres_demo.py
+++ b/llama_stack/distributions/postgres-demo/postgres_demo.py
@@ -91,7 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
             "embedding_dimension": 768,
         },
     )
-    postgres_config = PostgresSqlStoreConfig.sample_run_config()
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
@@ -105,22 +104,16 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": inference_providers + [embedding_provider],
                     "vector_io": vector_io_providers,
-                    "agents": [
-                        Provider(
-                            provider_id="meta-reference",
-                            provider_type="inline::meta-reference",
-                            config=dict(
-                                persistence_store=postgres_config,
-                                responses_store=postgres_config,
-                            ),
-                        )
-                    ],
                 },
                 default_models=default_models + [embedding_model],
                 default_tool_groups=default_tool_groups,
                 default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                metadata_store=PostgresKVStoreConfig.sample_run_config(),
-                inference_store=postgres_config,
+                storage_backends={
+                    "kv_default": PostgresKVStoreConfig.sample_run_config(
+                        table_name="llamastack_kvstore",
+                    ),
+                    "sql_default": PostgresSqlStoreConfig.sample_run_config(),
+                },
             ),
         },
         run_config_env_vars={
diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml
index 62faf3f62..9556b1287 100644
--- a/llama_stack/distributions/postgres-demo/run.yaml
+++ b/llama_stack/distributions/postgres-demo/run.yaml
@@ -22,9 +22,9 @@ providers:
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -34,20 +34,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: postgres
-        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
-      responses_store:
-        type: postgres
-        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
@@ -63,24 +58,35 @@ providers:
     provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
-metadata_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-  table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-inference_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 370d4b516..abfa579a7 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -93,30 +93,30 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
   - provider_id: ${env.MILVUS_URL:+milvus}
     provider_type: inline::milvus
     config:
       db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
   - provider_id: ${env.CHROMADB_URL:+chromadb}
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   - provider_id: ${env.PGVECTOR_DB:+pgvector}
     provider_type: remote::pgvector
     config:
@@ -125,17 +125,17 @@ providers:
       db: ${env.PGVECTOR_DB:=}
       user: ${env.PGVECTOR_USER:=}
       password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
       storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
       metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
+        table_name: files_metadata
+        backend: sql_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -147,12 +147,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   post_training:
   - provider_id: huggingface-gpu
     provider_type: inline::huggingface-gpu
@@ -166,21 +169,21 @@ providers:
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -210,17 +213,28 @@ providers:
     provider_type: inline::reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 2f4e7f350..fc58a4afe 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -93,30 +93,30 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
   - provider_id: ${env.MILVUS_URL:+milvus}
     provider_type: inline::milvus
     config:
       db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
   - provider_id: ${env.CHROMADB_URL:+chromadb}
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
   - provider_id: ${env.PGVECTOR_DB:+pgvector}
     provider_type: remote::pgvector
     config:
@@ -125,17 +125,17 @@ providers:
       db: ${env.PGVECTOR_DB:=}
       user: ${env.PGVECTOR_USER:=}
       password: ${env.PGVECTOR_PASSWORD:=}
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
       storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
       metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+        table_name: files_metadata
+        backend: sql_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -147,12 +147,15 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   post_training:
   - provider_id: torchtune-cpu
     provider_type: inline::torchtune-cpu
@@ -163,21 +166,21 @@ providers:
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -207,17 +210,28 @@ providers:
     provider_type: inline::reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py
index 807829999..542c7bea9 100644
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@@ -29,6 +29,12 @@ from llama_stack.core.datatypes import (
     ToolGroupInput,
 )
 from llama_stack.core.distribution import get_provider_registry
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    SqlStoreReference,
+    StorageBackendType,
+)
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
@@ -180,10 +186,9 @@ class RunConfigSettings(BaseModel):
     default_tool_groups: list[ToolGroupInput] | None = None
     default_datasets: list[DatasetInput] | None = None
     default_benchmarks: list[BenchmarkInput] | None = None
-    metadata_store: dict | None = None
-    inference_store: dict | None = None
-    conversations_store: dict | None = None
     telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
+    storage_backends: dict[str, Any] | None = None
+    storage_stores: dict[str, Any] | None = None
 
     def run_config(
         self,
@@ -226,6 +231,37 @@ class RunConfigSettings(BaseModel):
         # Get unique set of APIs from providers
         apis = sorted(providers.keys())
 
+        storage_backends = self.storage_backends or {
+            "kv_default": SqliteKVStoreConfig.sample_run_config(
+                __distro_dir__=f"~/.llama/distributions/{name}",
+                db_name="kvstore.db",
+            ),
+            "sql_default": SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=f"~/.llama/distributions/{name}",
+                db_name="sql_store.db",
+            ),
+        }
+
+        storage_stores = self.storage_stores or {
+            "metadata": KVStoreReference(
+                backend="kv_default",
+                namespace="registry",
+            ).model_dump(exclude_none=True),
+            "inference": InferenceStoreReference(
+                backend="sql_default",
+                table_name="inference_store",
+            ).model_dump(exclude_none=True),
+            "conversations": SqlStoreReference(
+                backend="sql_default",
+                table_name="openai_conversations",
+            ).model_dump(exclude_none=True),
+        }
+
+        storage_config = dict(
+            backends=storage_backends,
+            stores=storage_stores,
+        )
+
         # Return a dict that matches StackRunConfig structure
         return {
             "version": LLAMA_STACK_RUN_CONFIG_VERSION,
@@ -233,21 +269,7 @@ class RunConfigSettings(BaseModel):
             "container_image": container_image,
             "apis": apis,
             "providers": provider_configs,
-            "metadata_store": self.metadata_store
-            or SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=f"~/.llama/distributions/{name}",
-                db_name="registry.db",
-            ),
-            "inference_store": self.inference_store
-            or SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=f"~/.llama/distributions/{name}",
-                db_name="inference_store.db",
-            ),
-            "conversations_store": self.conversations_store
-            or SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=f"~/.llama/distributions/{name}",
-                db_name="conversations.db",
-            ),
+            "storage": storage_config,
             "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
             "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
             "vector_dbs": [],
@@ -297,11 +319,15 @@ class DistributionTemplate(BaseModel):
             # We should have a better way to do this by formalizing the concept of "internal" APIs
             # and providers, with a way to specify dependencies for them.
 
-            if run_config_.get("inference_store"):
-                additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
-
-            if run_config_.get("metadata_store"):
-                additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
+            storage_cfg = run_config_.get("storage", {})
+            for backend_cfg in storage_cfg.get("backends", {}).values():
+                store_type = backend_cfg.get("type")
+                if not store_type:
+                    continue
+                if str(store_type).startswith("kv_"):
+                    additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
+                elif str(store_type).startswith("sql_"):
+                    additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
 
         if self.additional_pip_packages:
             additional_pip_packages.extend(self.additional_pip_packages)
@@ -387,11 +413,13 @@ class DistributionTemplate(BaseModel):
         def enum_representer(dumper, data):
             return dumper.represent_scalar("tag:yaml.org,2002:str", data.value)
 
-        # Register YAML representer for ModelType
+        # Register YAML representer for enums
         yaml.add_representer(ModelType, enum_representer)
         yaml.add_representer(DatasetPurpose, enum_representer)
+        yaml.add_representer(StorageBackendType, enum_representer)
         yaml.SafeDumper.add_representer(ModelType, enum_representer)
         yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
+        yaml.SafeDumper.add_representer(StorageBackendType, enum_representer)
 
         for output_dir in [yaml_output_dir, doc_output_dir]:
             output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml
index c3db4eeb8..37866cb32 100644
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@@ -22,9 +22,9 @@ providers:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -34,32 +34,35 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
+        namespace: eval
+        backend: kv_default
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
+        namespace: datasetio::huggingface
+        backend: kv_default
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
+        namespace: datasetio::localfs
+        backend: kv_default
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -90,17 +93,28 @@ providers:
     config:
       storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
       metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
-conversations_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields: []
 vector_dbs: []
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 810c063e6..c2f6ea640 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -83,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents):
         self.policy = policy
 
     async def initialize(self) -> None:
-        self.persistence_store = await kvstore_impl(self.config.persistence_store)
-        self.responses_store = ResponsesStore(self.config.responses_store, self.policy)
+        self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
+        self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy)
         await self.responses_store.initialize()
         self.openai_responses_impl = OpenAIResponsesImpl(
             inference_api=self.inference_api,
diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/llama_stack/providers/inline/agents/meta_reference/config.py
index 1c392f29c..a800b426b 100644
--- a/llama_stack/providers/inline/agents/meta_reference/config.py
+++ b/llama_stack/providers/inline/agents/meta_reference/config.py
@@ -8,24 +8,30 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore import KVStoreConfig
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+
+
+class AgentPersistenceConfig(BaseModel):
+    """Nested persistence configuration for agents."""
+
+    agent_state: KVStoreReference
+    responses: ResponsesStoreReference
 
 
 class MetaReferenceAgentsImplConfig(BaseModel):
-    persistence_store: KVStoreConfig
-    responses_store: SqlStoreConfig
+    persistence: AgentPersistenceConfig
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "persistence_store": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="agents_store.db",
-            ),
-            "responses_store": SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="responses_store.db",
-            ),
+            "persistence": {
+                "agent_state": KVStoreReference(
+                    backend="kv_default",
+                    namespace="agents",
+                ).model_dump(exclude_none=True),
+                "responses": ResponsesStoreReference(
+                    backend="sql_default",
+                    table_name="responses",
+                ).model_dump(exclude_none=True),
+            }
         }
diff --git a/llama_stack/providers/inline/batches/reference/config.py b/llama_stack/providers/inline/batches/reference/config.py
index d8d06868b..f896a897d 100644
--- a/llama_stack/providers/inline/batches/reference/config.py
+++ b/llama_stack/providers/inline/batches/reference/config.py
@@ -6,13 +6,13 @@
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 
 
 class ReferenceBatchesImplConfig(BaseModel):
     """Configuration for the Reference Batches implementation."""
 
-    kvstore: KVStoreConfig = Field(
+    kvstore: KVStoreReference = Field(
         description="Configuration for the key-value store backend.",
     )
 
@@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict:
         return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="batches.db",
-            ),
+            "kvstore": KVStoreReference(
+                backend="kv_default",
+                namespace="batches",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/llama_stack/providers/inline/datasetio/localfs/config.py
index b450e8777..6e878df62 100644
--- a/llama_stack/providers/inline/datasetio/localfs/config.py
+++ b/llama_stack/providers/inline/datasetio/localfs/config.py
@@ -7,20 +7,17 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 
 
 class LocalFSDatasetIOConfig(BaseModel):
-    kvstore: KVStoreConfig
+    kvstore: KVStoreReference
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="localfs_datasetio.db",
-            )
+            "kvstore": KVStoreReference(
+                backend="kv_default",
+                namespace="datasetio::localfs",
+            ).model_dump(exclude_none=True)
         }
diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/llama_stack/providers/inline/eval/meta_reference/config.py
index 2a4a29998..b496c855e 100644
--- a/llama_stack/providers/inline/eval/meta_reference/config.py
+++ b/llama_stack/providers/inline/eval/meta_reference/config.py
@@ -7,20 +7,17 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 
 
 class MetaReferenceEvalConfig(BaseModel):
-    kvstore: KVStoreConfig
+    kvstore: KVStoreReference
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="meta_reference_eval.db",
-            )
+            "kvstore": KVStoreReference(
+                backend="kv_default",
+                namespace="eval",
+            ).model_dump(exclude_none=True)
         }
diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py
index 6c767af8f..0c2dd3b21 100644
--- a/llama_stack/providers/inline/files/localfs/config.py
+++ b/llama_stack/providers/inline/files/localfs/config.py
@@ -8,14 +8,14 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+from llama_stack.core.storage.datatypes import SqlStoreReference
 
 
 class LocalfsFilesImplConfig(BaseModel):
     storage_dir: str = Field(
         description="Directory to store uploaded files",
     )
-    metadata_store: SqlStoreConfig = Field(
+    metadata_store: SqlStoreReference = Field(
         description="SQL store configuration for file metadata",
     )
     ttl_secs: int = 365 * 24 * 60 * 60  # 1 year
@@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel):
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
-            "metadata_store": SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="files_metadata.db",
-            ),
+            "metadata_store": SqlStoreReference(
+                backend="sql_default",
+                table_name="files_metadata",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py
index a9566f7ff..1798f10de 100644
--- a/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -8,14 +8,14 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class ChromaVectorIOConfig(BaseModel):
     db_path: str
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+    persistence: KVStoreReference = Field(description="Config for KV store backend")
 
     @classmethod
     def sample_run_config(
@@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel):
     ) -> dict[str, Any]:
         return {
             "db_path": db_path,
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="chroma_inline_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::chroma",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py
index cbcbb1762..dd7a7aeca 100644
--- a/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -8,22 +8,19 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class FaissVectorIOConfig(BaseModel):
-    kvstore: KVStoreConfig
+    persistence: KVStoreReference
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="faiss_store.db",
-            )
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::faiss",
+            ).model_dump(exclude_none=True)
         }
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index df0864db8..ff1a6aa4c 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -214,7 +214,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         self.cache: dict[str, VectorDBWithIndex] = {}
 
     async def initialize(self) -> None:
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
         # Load existing banks from kvstore
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py
index 8cbd056be..b333b04ea 100644
--- a/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -8,25 +8,22 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class MilvusVectorIOConfig(BaseModel):
     db_path: str
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
     consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="milvus_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::milvus",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py
index e15c27ea1..e7ecde7b7 100644
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -9,23 +9,21 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class QdrantVectorIOConfig(BaseModel):
     path: str
-    kvstore: KVStoreConfig
+    persistence: KVStoreReference
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__, db_name="qdrant_registry.db"
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::qdrant",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
index 525ed4b1f..596f8fc95 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@@ -8,22 +8,19 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 
 
 class SQLiteVectorIOConfig(BaseModel):
     db_path: str = Field(description="Path to the SQLite database file")
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
             "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="sqlite_vec_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::sqlite_vec",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 8bc3b04cb..a58aa05b8 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -425,7 +425,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         self.vector_db_store = None
 
     async def initialize(self) -> None:
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
 
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/llama_stack/providers/remote/datasetio/huggingface/config.py
index 38f933728..35297cb58 100644
--- a/llama_stack/providers/remote/datasetio/huggingface/config.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/config.py
@@ -7,20 +7,17 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 
 
 class HuggingfaceDatasetIOConfig(BaseModel):
-    kvstore: KVStoreConfig
+    kvstore: KVStoreReference
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="huggingface_datasetio.db",
-            )
+            "kvstore": KVStoreReference(
+                backend="kv_default",
+                namespace="datasetio::huggingface",
+            ).model_dump(exclude_none=True)
         }
diff --git a/llama_stack/providers/remote/files/s3/config.py b/llama_stack/providers/remote/files/s3/config.py
index da20d8668..cd4b1adda 100644
--- a/llama_stack/providers/remote/files/s3/config.py
+++ b/llama_stack/providers/remote/files/s3/config.py
@@ -8,7 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+from llama_stack.core.storage.datatypes import SqlStoreReference
 
 
 class S3FilesImplConfig(BaseModel):
@@ -24,7 +24,7 @@ class S3FilesImplConfig(BaseModel):
     auto_create_bucket: bool = Field(
         default=False, description="Automatically create the S3 bucket if it doesn't exist"
     )
-    metadata_store: SqlStoreConfig = Field(description="SQL store configuration for file metadata")
+    metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata")
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@@ -35,8 +35,8 @@ class S3FilesImplConfig(BaseModel):
             "aws_secret_access_key": "${env.AWS_SECRET_ACCESS_KEY:=}",
             "endpoint_url": "${env.S3_ENDPOINT_URL:=}",
             "auto_create_bucket": "${env.S3_AUTO_CREATE_BUCKET:=false}",
-            "metadata_store": SqliteSqlStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="s3_files_metadata.db",
-            ),
+            "metadata_store": SqlStoreReference(
+                backend="sql_default",
+                table_name="s3_files_metadata",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 5792a83c6..b07207cc6 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -151,7 +151,7 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.vector_db_store = None
 
     async def initialize(self) -> None:
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
         self.vector_db_store = self.kvstore
 
         if isinstance(self.config, RemoteChromaVectorIOConfig):
diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py
index a1193905a..209ba90bb 100644
--- a/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -8,21 +8,21 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
 @json_schema_type
 class ChromaVectorIOConfig(BaseModel):
     url: str | None
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+    persistence: KVStoreReference = Field(description="Config for KV store backend")
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
         return {
             "url": url,
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="chroma_remote_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::chroma_remote",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py
index 899d3678d..8ff9e1328 100644
--- a/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -8,7 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, ConfigDict, Field
 
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
@@ -17,7 +17,7 @@ class MilvusVectorIOConfig(BaseModel):
     uri: str = Field(description="The URI of the Milvus server")
     token: str | None = Field(description="The token of the Milvus server")
     consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
-    kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+    persistence: KVStoreReference = Field(description="Config for KV store backend")
 
     # This configuration allows additional fields to be passed through to the underlying Milvus client.
     # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
@@ -28,8 +28,8 @@ class MilvusVectorIOConfig(BaseModel):
         return {
             "uri": "${env.MILVUS_ENDPOINT}",
             "token": "${env.MILVUS_TOKEN}",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="milvus_remote_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::milvus_remote",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index d7147a7f0..1f689d1a9 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -321,7 +321,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     async def initialize(self) -> None:
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
         stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py
index 334cbe5be..d81e524e4 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -8,10 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
@@ -22,7 +19,9 @@ class PGVectorVectorIOConfig(BaseModel):
     db: str | None = Field(default="postgres")
     user: str | None = Field(default="postgres")
     password: str | None = Field(default="mysecretpassword")
-    kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
+    persistence: KVStoreReference | None = Field(
+        description="Config for KV store backend (SQLite only for now)", default=None
+    )
 
     @classmethod
     def sample_run_config(
@@ -41,8 +40,8 @@ class PGVectorVectorIOConfig(BaseModel):
             "db": db,
             "user": user,
             "password": password,
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="pgvector_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::pgvector",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index d55c13103..691cf965c 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -358,7 +358,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
 
     async def initialize(self) -> None:
         log.info(f"Initializing PGVector memory adapter with config: {self.config}")
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
         await self.initialize_openai_vector_stores()
 
         try:
diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py
index ff5506236..01fbcc5cb 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -8,10 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
@@ -27,14 +24,14 @@ class QdrantVectorIOConfig(BaseModel):
     prefix: str | None = None
     timeout: int | None = None
     host: str | None = None
-    kvstore: KVStoreConfig
+    persistence: KVStoreReference
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "api_key": "${env.QDRANT_API_KEY:=}",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="qdrant_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::qdrant_remote",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 8b90935cd..eba8333e4 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -174,9 +174,9 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self._qdrant_lock = asyncio.Lock()
 
     async def initialize(self) -> None:
-        client_config = self.config.model_dump(exclude_none=True, exclude={"kvstore"})
+        client_config = self.config.model_dump(exclude_none=True, exclude={"persistence"})
         self.client = AsyncQdrantClient(**client_config)
-        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.kvstore = await kvstore_impl(self.config.persistence)
 
         start_key = VECTOR_DBS_PREFIX
         end_key = f"{VECTOR_DBS_PREFIX}\xff"
diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py
index b693e294e..06242c6b4 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -8,10 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.providers.utils.kvstore.config import (
-    KVStoreConfig,
-    SqliteKVStoreConfig,
-)
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.schema_utils import json_schema_type
 
 
@@ -19,7 +16,9 @@ from llama_stack.schema_utils import json_schema_type
 class WeaviateVectorIOConfig(BaseModel):
     weaviate_api_key: str | None = Field(description="The API key for the Weaviate instance", default=None)
     weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080")
-    kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
+    persistence: KVStoreReference | None = Field(
+        description="Config for KV store backend (SQLite only for now)", default=None
+    )
 
     @classmethod
     def sample_run_config(
@@ -30,8 +29,8 @@ class WeaviateVectorIOConfig(BaseModel):
         return {
             "weaviate_api_key": None,
             "weaviate_cluster_url": "${env.WEAVIATE_CLUSTER_URL:=localhost:8080}",
-            "kvstore": SqliteKVStoreConfig.sample_run_config(
-                __distro_dir__=__distro_dir__,
-                db_name="weaviate_registry.db",
-            ),
+            "persistence": KVStoreReference(
+                backend="kv_default",
+                namespace="vector_io::weaviate",
+            ).model_dump(exclude_none=True),
         }
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index d8b11c441..06ffc8706 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -320,8 +320,8 @@ class WeaviateVectorIOAdapter(
     async def initialize(self) -> None:
         """Set up KV store and load existing vector DBs and OpenAI vector stores."""
         # Initialize KV store for metadata if configured
-        if self.config.kvstore is not None:
-            self.kvstore = await kvstore_impl(self.config.kvstore)
+        if self.config.persistence is not None:
+            self.kvstore = await kvstore_impl(self.config.persistence)
         else:
             self.kvstore = None
             log.info("No kvstore configured, registry will not persist across restarts")
diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py
index 901f77c67..8e20bca6b 100644
--- a/llama_stack/providers/utils/inference/inference_store.py
+++ b/llama_stack/providers/utils/inference/inference_store.py
@@ -15,12 +15,13 @@ from llama_stack.apis.inference import (
     OpenAIMessageParam,
     Order,
 )
-from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
 from llama_stack.log import get_logger
 
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl
+from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
 
 logger = get_logger(name=__name__, category="inference")
 
@@ -28,33 +29,32 @@ logger = get_logger(name=__name__, category="inference")
 class InferenceStore:
     def __init__(
         self,
-        config: InferenceStoreConfig | SqlStoreConfig,
+        reference: InferenceStoreReference,
         policy: list[AccessRule],
     ):
-        # Handle backward compatibility
-        if not isinstance(config, InferenceStoreConfig):
-            # Legacy: SqlStoreConfig passed directly as config
-            config = InferenceStoreConfig(
-                sql_store_config=config,
-            )
-
-        self.config = config
-        self.sql_store_config = config.sql_store_config
+        self.reference = reference
         self.sql_store = None
         self.policy = policy
 
-        # Disable write queue for SQLite to avoid concurrency issues
-        self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
-
         # Async write queue and worker control
         self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None
         self._worker_tasks: list[asyncio.Task[Any]] = []
-        self._max_write_queue_size: int = config.max_write_queue_size
-        self._num_writers: int = max(1, config.num_writers)
+        self._max_write_queue_size: int = reference.max_write_queue_size
+        self._num_writers: int = max(1, reference.num_writers)
 
     async def initialize(self):
         """Create the necessary tables if they don't exist."""
-        self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy)
+        base_store = sqlstore_impl(self.reference)
+        self.sql_store = AuthorizedSqlStore(base_store, self.policy)
+
+        # Disable write queue for SQLite to avoid concurrency issues
+        backend_name = self.reference.backend
+        backend_config = _SQLSTORE_BACKENDS.get(backend_name)
+        if backend_config is None:
+            raise ValueError(
+                f"Unregistered SQL backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
+            )
+        self.enable_write_queue = backend_config.type != StorageBackendType.SQL_SQLITE
         await self.sql_store.create_table(
             "chat_completions",
             {
diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py
index 7b6a79350..c0582abc4 100644
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@@ -4,143 +4,20 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import re
-from enum import Enum
-from typing import Annotated, Literal
+from typing import Annotated
 
-from pydantic import BaseModel, Field, field_validator
-
-from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
-
-
-class KVStoreType(Enum):
-    redis = "redis"
-    sqlite = "sqlite"
-    postgres = "postgres"
-    mongodb = "mongodb"
-
-
-class CommonConfig(BaseModel):
-    namespace: str | None = Field(
-        default=None,
-        description="All keys will be prefixed with this namespace",
-    )
-
-
-class RedisKVStoreConfig(CommonConfig):
-    type: Literal["redis"] = KVStoreType.redis.value
-    host: str = "localhost"
-    port: int = 6379
-
-    @property
-    def url(self) -> str:
-        return f"redis://{self.host}:{self.port}"
-
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return ["redis"]
-
-    @classmethod
-    def sample_run_config(cls):
-        return {
-            "type": "redis",
-            "host": "${env.REDIS_HOST:=localhost}",
-            "port": "${env.REDIS_PORT:=6379}",
-        }
-
-
-class SqliteKVStoreConfig(CommonConfig):
-    type: Literal["sqlite"] = KVStoreType.sqlite.value
-    db_path: str = Field(
-        default=(RUNTIME_BASE_DIR / "kvstore.db").as_posix(),
-        description="File path for the sqlite database",
-    )
-
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return ["aiosqlite"]
-
-    @classmethod
-    def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
-        return {
-            "type": "sqlite",
-            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
-        }
-
-
-class PostgresKVStoreConfig(CommonConfig):
-    type: Literal["postgres"] = KVStoreType.postgres.value
-    host: str = "localhost"
-    port: int = 5432
-    db: str = "llamastack"
-    user: str
-    password: str | None = None
-    ssl_mode: str | None = None
-    ca_cert_path: str | None = None
-    table_name: str = "llamastack_kvstore"
-
-    @classmethod
-    def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
-        return {
-            "type": "postgres",
-            "host": "${env.POSTGRES_HOST:=localhost}",
-            "port": "${env.POSTGRES_PORT:=5432}",
-            "db": "${env.POSTGRES_DB:=llamastack}",
-            "user": "${env.POSTGRES_USER:=llamastack}",
-            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
-            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
-        }
-
-    @classmethod
-    @field_validator("table_name")
-    def validate_table_name(cls, v: str) -> str:
-        # PostgreSQL identifiers rules:
-        # - Must start with a letter or underscore
-        # - Can contain letters, numbers, and underscores
-        # - Maximum length is 63 bytes
-        pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
-        if not re.match(pattern, v):
-            raise ValueError(
-                "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
-            )
-        if len(v) > 63:
-            raise ValueError("Table name must be less than 63 characters")
-        return v
-
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return ["psycopg2-binary"]
-
-
-class MongoDBKVStoreConfig(CommonConfig):
-    type: Literal["mongodb"] = KVStoreType.mongodb.value
-    host: str = "localhost"
-    port: int = 27017
-    db: str = "llamastack"
-    user: str | None = None
-    password: str | None = None
-    collection_name: str = "llamastack_kvstore"
-
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return ["pymongo"]
-
-    @classmethod
-    def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
-        return {
-            "type": "mongodb",
-            "host": "${env.MONGODB_HOST:=localhost}",
-            "port": "${env.MONGODB_PORT:=5432}",
-            "db": "${env.MONGODB_DB}",
-            "user": "${env.MONGODB_USER}",
-            "password": "${env.MONGODB_PASSWORD}",
-            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
-        }
+from pydantic import Field
 
+from llama_stack.core.storage.datatypes import (
+    MongoDBKVStoreConfig,
+    PostgresKVStoreConfig,
+    RedisKVStoreConfig,
+    SqliteKVStoreConfig,
+    StorageBackendType,
+)
 
 KVStoreConfig = Annotated[
-    RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
-    Field(discriminator="type", default=KVStoreType.sqlite.value),
+    RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, Field(discriminator="type")
 ]
 
 
@@ -148,13 +25,13 @@ def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
     """Get pip packages for KV store config, handling both dict and object cases."""
     if isinstance(store_config, dict):
         store_type = store_config.get("type")
-        if store_type == "sqlite":
+        if store_type == StorageBackendType.KV_SQLITE.value:
             return SqliteKVStoreConfig.pip_packages()
-        elif store_type == "postgres":
+        elif store_type == StorageBackendType.KV_POSTGRES.value:
             return PostgresKVStoreConfig.pip_packages()
-        elif store_type == "redis":
+        elif store_type == StorageBackendType.KV_REDIS.value:
             return RedisKVStoreConfig.pip_packages()
-        elif store_type == "mongodb":
+        elif store_type == StorageBackendType.KV_MONGODB.value:
             return MongoDBKVStoreConfig.pip_packages()
         else:
             raise ValueError(f"Unknown KV store type: {store_type}")
diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/llama_stack/providers/utils/kvstore/kvstore.py
index 426523d8e..eee51e5d9 100644
--- a/llama_stack/providers/utils/kvstore/kvstore.py
+++ b/llama_stack/providers/utils/kvstore/kvstore.py
@@ -4,9 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from __future__ import annotations
+
+from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
 
 from .api import KVStore
-from .config import KVStoreConfig, KVStoreType
+from .config import KVStoreConfig
 
 
 def kvstore_dependencies():
@@ -44,20 +52,41 @@ class InmemoryKVStoreImpl(KVStore):
         del self._store[key]
 
 
-async def kvstore_impl(config: KVStoreConfig) -> KVStore:
-    if config.type == KVStoreType.redis.value:
+_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
+
+
+def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
+    """Register the set of available KV store backends for reference resolution."""
+    global _KVSTORE_BACKENDS
+
+    _KVSTORE_BACKENDS.clear()
+    for name, cfg in backends.items():
+        _KVSTORE_BACKENDS[name] = cfg
+
+
+async def kvstore_impl(reference: KVStoreReference) -> KVStore:
+    backend_name = reference.backend
+
+    backend_config = _KVSTORE_BACKENDS.get(backend_name)
+    if backend_config is None:
+        raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
+
+    config = backend_config.model_copy()
+    config.namespace = reference.namespace
+
+    if config.type == StorageBackendType.KV_REDIS.value:
         from .redis import RedisKVStoreImpl
 
         impl = RedisKVStoreImpl(config)
-    elif config.type == KVStoreType.sqlite.value:
+    elif config.type == StorageBackendType.KV_SQLITE.value:
         from .sqlite import SqliteKVStoreImpl
 
         impl = SqliteKVStoreImpl(config)
-    elif config.type == KVStoreType.postgres.value:
+    elif config.type == StorageBackendType.KV_POSTGRES.value:
         from .postgres import PostgresKVStoreImpl
 
         impl = PostgresKVStoreImpl(config)
-    elif config.type == KVStoreType.mongodb.value:
+    elif config.type == StorageBackendType.KV_MONGODB.value:
         from .mongodb import MongoDBKVStoreImpl
 
         impl = MongoDBKVStoreImpl(config)
diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py
index 36370b492..d5c243252 100644
--- a/llama_stack/providers/utils/responses/responses_store.py
+++ b/llama_stack/providers/utils/responses/responses_store.py
@@ -18,13 +18,13 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseObjectWithInput,
 )
 from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.core.datatypes import AccessRule, ResponsesStoreConfig
-from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
+from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
 from llama_stack.log import get_logger
 
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, SqlStoreType, sqlstore_impl
+from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
 
 logger = get_logger(name=__name__, category="openai_responses")
 
@@ -45,39 +45,38 @@ class _OpenAIResponseObjectWithInputAndMessages(OpenAIResponseObjectWithInput):
 class ResponsesStore:
     def __init__(
         self,
-        config: ResponsesStoreConfig | SqlStoreConfig,
+        reference: ResponsesStoreReference | SqlStoreReference,
         policy: list[AccessRule],
     ):
-        # Handle backward compatibility
-        if not isinstance(config, ResponsesStoreConfig):
-            # Legacy: SqlStoreConfig passed directly as config
-            config = ResponsesStoreConfig(
-                sql_store_config=config,
-            )
+        if isinstance(reference, ResponsesStoreReference):
+            self.reference = reference
+        else:
+            self.reference = ResponsesStoreReference(**reference.model_dump())
 
-        self.config = config
-        self.sql_store_config = config.sql_store_config
-        if not self.sql_store_config:
-            self.sql_store_config = SqliteSqlStoreConfig(
-                db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
-            )
-        self.sql_store = None
         self.policy = policy
-
-        # Disable write queue for SQLite to avoid concurrency issues
-        self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
+        self.sql_store = None
+        self.enable_write_queue = True
 
         # Async write queue and worker control
         self._queue: (
             asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None
         ) = None
         self._worker_tasks: list[asyncio.Task[Any]] = []
-        self._max_write_queue_size: int = config.max_write_queue_size
-        self._num_writers: int = max(1, config.num_writers)
+        self._max_write_queue_size: int = self.reference.max_write_queue_size
+        self._num_writers: int = max(1, self.reference.num_writers)
 
     async def initialize(self):
         """Create the necessary tables if they don't exist."""
-        self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config), self.policy)
+        base_store = sqlstore_impl(self.reference)
+        self.sql_store = AuthorizedSqlStore(base_store, self.policy)
+
+        backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
+        if backend_config is None:
+            raise ValueError(
+                f"Unregistered SQL backend '{self.reference.backend}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
+            )
+        if backend_config.type == StorageBackendType.SQL_SQLITE:
+            self.enable_write_queue = False
         await self.sql_store.create_table(
             "openai_responses",
             {
diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
index e1da4db6e..3dfc82677 100644
--- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
@@ -12,10 +12,10 @@ from llama_stack.core.access_control.conditions import ProtectedResource
 from llama_stack.core.access_control.datatypes import AccessRule, Action, Scope
 from llama_stack.core.datatypes import User
 from llama_stack.core.request_headers import get_authenticated_user
+from llama_stack.core.storage.datatypes import StorageBackendType
 from llama_stack.log import get_logger
 
 from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore
-from .sqlstore import SqlStoreType
 
 logger = get_logger(name=__name__, category="providers::utils")
 
@@ -82,8 +82,8 @@ class AuthorizedSqlStore:
         if not hasattr(self.sql_store, "config"):
             raise ValueError("SqlStore must have a config attribute to be used with AuthorizedSqlStore")
 
-        self.database_type = self.sql_store.config.type
-        if self.database_type not in [SqlStoreType.postgres, SqlStoreType.sqlite]:
+        self.database_type = self.sql_store.config.type.value
+        if self.database_type not in [StorageBackendType.SQL_POSTGRES.value, StorageBackendType.SQL_SQLITE.value]:
             raise ValueError(f"Unsupported database type: {self.database_type}")
 
     def _validate_sql_optimized_policy(self) -> None:
@@ -220,9 +220,9 @@ class AuthorizedSqlStore:
         Returns:
             SQL expression to extract JSON value
         """
-        if self.database_type == SqlStoreType.postgres:
+        if self.database_type == StorageBackendType.SQL_POSTGRES.value:
             return f"{column}->'{path}'"
-        elif self.database_type == SqlStoreType.sqlite:
+        elif self.database_type == StorageBackendType.SQL_SQLITE.value:
             return f"JSON_EXTRACT({column}, '$.{path}')"
         else:
             raise ValueError(f"Unsupported database type: {self.database_type}")
@@ -237,9 +237,9 @@ class AuthorizedSqlStore:
         Returns:
             SQL expression to extract JSON value as text
         """
-        if self.database_type == SqlStoreType.postgres:
+        if self.database_type == StorageBackendType.SQL_POSTGRES.value:
             return f"{column}->>'{path}'"
-        elif self.database_type == SqlStoreType.sqlite:
+        elif self.database_type == StorageBackendType.SQL_SQLITE.value:
             return f"JSON_EXTRACT({column}, '$.{path}')"
         else:
             raise ValueError(f"Unsupported database type: {self.database_type}")
@@ -248,10 +248,10 @@ class AuthorizedSqlStore:
         """Get the SQL conditions for public access."""
         # Public records are records that have no owner_principal or access_attributes
         conditions = ["owner_principal = ''"]
-        if self.database_type == SqlStoreType.postgres:
+        if self.database_type == StorageBackendType.SQL_POSTGRES.value:
             # Postgres stores JSON null as 'null'
             conditions.append("access_attributes::text = 'null'")
-        elif self.database_type == SqlStoreType.sqlite:
+        elif self.database_type == StorageBackendType.SQL_SQLITE.value:
             conditions.append("access_attributes = 'null'")
         else:
             raise ValueError(f"Unsupported database type: {self.database_type}")
diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
index 23cd6444e..c1ccd73dd 100644
--- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@@ -26,10 +26,10 @@ from sqlalchemy.ext.asyncio.engine import AsyncEngine
 from sqlalchemy.sql.elements import ColumnElement
 
 from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig
 from llama_stack.log import get_logger
 
 from .api import ColumnDefinition, ColumnType, SqlStore
-from .sqlstore import SqlAlchemySqlStoreConfig
 
 logger = get_logger(name=__name__, category="providers::utils")
 
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py
index fc44402ae..31801c4ca 100644
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@@ -4,90 +4,28 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from abc import abstractmethod
-from enum import StrEnum
-from pathlib import Path
-from typing import Annotated, Literal
+from typing import Annotated, cast
 
-from pydantic import BaseModel, Field
+from pydantic import Field
 
-from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
+from llama_stack.core.storage.datatypes import (
+    PostgresSqlStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageBackendConfig,
+    StorageBackendType,
+)
 
 from .api import SqlStore
 
 sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
 
-
-class SqlStoreType(StrEnum):
-    sqlite = "sqlite"
-    postgres = "postgres"
-
-
-class SqlAlchemySqlStoreConfig(BaseModel):
-    @property
-    @abstractmethod
-    def engine_str(self) -> str: ...
-
-    # TODO: move this when we have a better way to specify dependencies with internal APIs
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return ["sqlalchemy[asyncio]"]
-
-
-class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
-    type: Literal[SqlStoreType.sqlite] = SqlStoreType.sqlite
-    db_path: str = Field(
-        default=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
-        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
-    )
-
-    @property
-    def engine_str(self) -> str:
-        return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
-
-    @classmethod
-    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
-        return {
-            "type": "sqlite",
-            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
-        }
-
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return super().pip_packages() + ["aiosqlite"]
-
-
-class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
-    type: Literal[SqlStoreType.postgres] = SqlStoreType.postgres
-    host: str = "localhost"
-    port: int = 5432
-    db: str = "llamastack"
-    user: str
-    password: str | None = None
-
-    @property
-    def engine_str(self) -> str:
-        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
-
-    @classmethod
-    def pip_packages(cls) -> list[str]:
-        return super().pip_packages() + ["asyncpg"]
-
-    @classmethod
-    def sample_run_config(cls, **kwargs):
-        return {
-            "type": "postgres",
-            "host": "${env.POSTGRES_HOST:=localhost}",
-            "port": "${env.POSTGRES_PORT:=5432}",
-            "db": "${env.POSTGRES_DB:=llamastack}",
-            "user": "${env.POSTGRES_USER:=llamastack}",
-            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
-        }
+_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
 
 
 SqlStoreConfig = Annotated[
     SqliteSqlStoreConfig | PostgresSqlStoreConfig,
-    Field(discriminator="type", default=SqlStoreType.sqlite.value),
+    Field(discriminator="type"),
 ]
 
 
@@ -95,9 +33,9 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
     """Get pip packages for SQL store config, handling both dict and object cases."""
     if isinstance(store_config, dict):
         store_type = store_config.get("type")
-        if store_type == "sqlite":
+        if store_type == StorageBackendType.SQL_SQLITE.value:
             return SqliteSqlStoreConfig.pip_packages()
-        elif store_type == "postgres":
+        elif store_type == StorageBackendType.SQL_POSTGRES.value:
             return PostgresSqlStoreConfig.pip_packages()
         else:
             raise ValueError(f"Unknown SQL store type: {store_type}")
@@ -105,12 +43,28 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
         return store_config.pip_packages()
 
 
-def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
-    if config.type in [SqlStoreType.sqlite, SqlStoreType.postgres]:
+def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
+    backend_name = reference.backend
+
+    backend_config = _SQLSTORE_BACKENDS.get(backend_name)
+    if backend_config is None:
+        raise ValueError(
+            f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
+        )
+
+    if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
         from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
 
-        impl = SqlAlchemySqlStoreImpl(config)
+        config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
+        return SqlAlchemySqlStoreImpl(config)
     else:
-        raise ValueError(f"Unknown sqlstore type {config.type}")
+        raise ValueError(f"Unknown sqlstore type {backend_config.type}")
 
-    return impl
+
+def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
+    """Register the set of available SQL store backends for reference resolution."""
+    global _SQLSTORE_BACKENDS
+
+    _SQLSTORE_BACKENDS.clear()
+    for name, cfg in backends.items():
+        _SQLSTORE_BACKENDS[name] = cfg
diff --git a/scripts/docker.sh b/scripts/docker.sh
index 1ba1d9adf..7a5c3e6e0 100755
--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@@ -236,7 +236,7 @@ start_container() {
     echo "=== Starting Docker Container ==="
 
     # Get the repo root for volume mount
-    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)
     REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
 
     # Determine the actual image name (may have localhost/ prefix)
diff --git a/tests/external/run-byoa.yaml b/tests/external/run-byoa.yaml
index 5774ae9da..4d63046c6 100644
--- a/tests/external/run-byoa.yaml
+++ b/tests/external/run-byoa.yaml
@@ -7,6 +7,24 @@ providers:
   - provider_id: kaze
     provider_type: remote::kaze
     config: {}
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/external}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 external_apis_dir: ~/.llama/apis.d
 external_providers_dir: ~/.llama/providers.d
 server:
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 68a30fc69..eb6840e60 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -238,7 +238,7 @@ def instantiate_llama_stack_client(session):
         run_config = run_config_from_adhoc_config_spec(config)
         run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
         with open(run_config_file.name, "w") as f:
-            yaml.dump(run_config.model_dump(), f)
+            yaml.dump(run_config.model_dump(mode="json"), f)
         config = run_config_file.name
 
     client = LlamaStackAsLibraryClient(
diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
index 98bef0f2c..ad9115756 100644
--- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
+++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
@@ -12,9 +12,15 @@ import pytest
 
 from llama_stack.core.access_control.access_control import default_policy
 from llama_stack.core.datatypes import User
+from llama_stack.core.storage.datatypes import SqlStoreReference
 from llama_stack.providers.utils.sqlstore.api import ColumnType
 from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig, SqliteSqlStoreConfig, sqlstore_impl
+from llama_stack.providers.utils.sqlstore.sqlstore import (
+    PostgresSqlStoreConfig,
+    SqliteSqlStoreConfig,
+    register_sqlstore_backends,
+    sqlstore_impl,
+)
 
 
 def get_postgres_config():
@@ -55,8 +61,9 @@ def authorized_store(backend_config):
     config_func = backend_config
 
     config = config_func()
-
-    base_sqlstore = sqlstore_impl(config)
+    backend_name = f"sql_{type(config).__name__.lower()}"
+    register_sqlstore_backends({backend_name: config})
+    base_sqlstore = sqlstore_impl(SqlStoreReference(backend=backend_name, table_name="authorized_store"))
     authorized_store = AuthorizedSqlStore(base_sqlstore, default_policy())
 
     yield authorized_store
diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py
new file mode 100644
index 000000000..e9b80dc0c
--- /dev/null
+++ b/tests/integration/test_persistence_integration.py
@@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import yaml
+
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.storage.datatypes import (
+    PostgresKVStoreConfig,
+    PostgresSqlStoreConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+)
+
+
+def test_starter_distribution_config_loads_and_resolves():
+    """Integration: Actual starter config should parse and have correct storage structure."""
+    with open("llama_stack/distributions/starter/run.yaml") as f:
+        config_dict = yaml.safe_load(f)
+
+    config = StackRunConfig(**config_dict)
+
+    # Config should have named backends and explicit store references
+    assert config.storage is not None
+    assert "kv_default" in config.storage.backends
+    assert "sql_default" in config.storage.backends
+    assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig)
+    assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig)
+
+    stores = config.storage.stores
+    assert stores.metadata is not None
+    assert stores.metadata.backend == "kv_default"
+    assert stores.metadata.namespace == "registry"
+
+    assert stores.inference is not None
+    assert stores.inference.backend == "sql_default"
+    assert stores.inference.table_name == "inference_store"
+    assert stores.inference.max_write_queue_size > 0
+    assert stores.inference.num_writers > 0
+
+    assert stores.conversations is not None
+    assert stores.conversations.backend == "sql_default"
+    assert stores.conversations.table_name == "openai_conversations"
+
+
+def test_postgres_demo_distribution_config_loads():
+    """Integration: Postgres demo should use Postgres backend for all stores."""
+    with open("llama_stack/distributions/postgres-demo/run.yaml") as f:
+        config_dict = yaml.safe_load(f)
+
+    config = StackRunConfig(**config_dict)
+
+    # Should have postgres backend
+    assert config.storage is not None
+    assert "kv_default" in config.storage.backends
+    assert "sql_default" in config.storage.backends
+    postgres_backend = config.storage.backends["sql_default"]
+    assert isinstance(postgres_backend, PostgresSqlStoreConfig)
+    assert postgres_backend.host == "${env.POSTGRES_HOST:=localhost}"
+
+    kv_backend = config.storage.backends["kv_default"]
+    assert isinstance(kv_backend, PostgresKVStoreConfig)
+
+    stores = config.storage.stores
+    # Stores target the Postgres backends explicitly
+    assert stores.metadata is not None
+    assert stores.metadata.backend == "kv_default"
+    assert stores.inference is not None
+    assert stores.inference.backend == "sql_default"
diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py
index daaf229e5..7b9f3ca0c 100644
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@@ -23,6 +23,27 @@ def config_with_image_name_int():
         image_name: 1234
         apis_to_serve: []
         built_at: {datetime.now().isoformat()}
+        storage:
+          backends:
+            kv_default:
+              type: kv_sqlite
+              db_path: /tmp/test_kv.db
+            sql_default:
+              type: sql_sqlite
+              db_path: /tmp/test_sql.db
+          stores:
+            metadata:
+              backend: kv_default
+              namespace: metadata
+            inference:
+              backend: sql_default
+              table_name: inference
+            conversations:
+              backend: sql_default
+              table_name: conversations
+            responses:
+              backend: sql_default
+              table_name: responses
         providers:
           inference:
             - provider_id: provider1
@@ -54,6 +75,27 @@ def up_to_date_config():
         image_name: foo
         apis_to_serve: []
         built_at: {datetime.now().isoformat()}
+        storage:
+          backends:
+            kv_default:
+              type: kv_sqlite
+              db_path: /tmp/test_kv.db
+            sql_default:
+              type: sql_sqlite
+              db_path: /tmp/test_sql.db
+          stores:
+            metadata:
+              backend: kv_default
+              namespace: metadata
+            inference:
+              backend: sql_default
+              table_name: inference
+            conversations:
+              backend: sql_default
+              table_name: conversations
+            responses:
+              backend: sql_default
+              table_name: responses
         providers:
           inference:
             - provider_id: provider1
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 65c3e2333..ff6dd243d 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import (
     ConversationServiceConfig,
     ConversationServiceImpl,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.storage.datatypes import (
+    ServerStoresConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 @pytest.fixture
@@ -28,7 +35,18 @@ async def service():
     with tempfile.TemporaryDirectory() as tmpdir:
         db_path = Path(tmpdir) / "test_conversations.db"
 
-        config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[])
+        storage = StorageConfig(
+            backends={
+                "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
+            },
+            stores=ServerStoresConfig(
+                conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+            ),
+        )
+        register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
+        run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
+
+        config = ConversationServiceConfig(run_config=run_config, policy=[])
         service = ConversationServiceImpl(config, {})
         await service.initialize()
         yield service
@@ -121,9 +139,18 @@ async def test_policy_configuration():
             AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*"))
         ]
 
-        config = ConversationServiceConfig(
-            conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy
+        storage = StorageConfig(
+            backends={
+                "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
+            },
+            stores=ServerStoresConfig(
+                conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+            ),
         )
+        register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
+        run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
+
+        config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy)
         service = ConversationServiceImpl(config, {})
         await service.initialize()
 
diff --git a/tests/unit/core/test_storage_references.py b/tests/unit/core/test_storage_references.py
new file mode 100644
index 000000000..7bceba74d
--- /dev/null
+++ b/tests/unit/core/test_storage_references.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Unit tests for storage backend/reference validation."""
+
+import pytest
+from pydantic import ValidationError
+
+from llama_stack.core.datatypes import (
+    LLAMA_STACK_RUN_CONFIG_VERSION,
+    StackRunConfig,
+)
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+
+
+def _base_run_config(**overrides):
+    metadata_reference = overrides.pop(
+        "metadata_reference",
+        KVStoreReference(backend="kv_default", namespace="registry"),
+    )
+    inference_reference = overrides.pop(
+        "inference_reference",
+        InferenceStoreReference(backend="sql_default", table_name="inference"),
+    )
+    conversations_reference = overrides.pop(
+        "conversations_reference",
+        SqlStoreReference(backend="sql_default", table_name="conversations"),
+    )
+    storage = overrides.pop(
+        "storage",
+        StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"),
+                "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"),
+            },
+            stores=ServerStoresConfig(
+                metadata=metadata_reference,
+                inference=inference_reference,
+                conversations=conversations_reference,
+            ),
+        ),
+    )
+    return StackRunConfig(
+        version=LLAMA_STACK_RUN_CONFIG_VERSION,
+        image_name="test-distro",
+        apis=[],
+        providers={},
+        storage=storage,
+        **overrides,
+    )
+
+
+def test_references_require_known_backend():
+    with pytest.raises(ValidationError, match="unknown backend 'missing'"):
+        _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry"))
+
+
+def test_references_must_match_backend_family():
+    with pytest.raises(ValidationError, match="kv_.* is required"):
+        _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry"))
+
+    with pytest.raises(ValidationError, match="sql_.* is required"):
+        _base_run_config(
+            inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"),
+        )
+
+
+def test_valid_configuration_passes_validation():
+    config = _base_run_config()
+    stores = config.storage.stores
+    assert stores.metadata is not None and stores.metadata.backend == "kv_default"
+    assert stores.inference is not None and stores.inference.backend == "sql_default"
+    assert stores.conversations is not None and stores.conversations.backend == "sql_default"
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index 08a376008..3b0643a13 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -13,6 +13,15 @@ from pydantic import BaseModel, Field, ValidationError
 
 from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, providable_apis
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
 from llama_stack.providers.datatypes import ProviderSpec
 
 
@@ -29,6 +38,32 @@ class SampleConfig(BaseModel):
         }
 
 
+def _default_storage() -> StorageConfig:
+    return StorageConfig(
+        backends={
+            "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
+            "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+            inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+            conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+        ),
+    )
+
+
+def make_stack_config(**overrides) -> StackRunConfig:
+    storage = overrides.pop("storage", _default_storage())
+    defaults = dict(
+        image_name="test_image",
+        apis=[],
+        providers={},
+        storage=storage,
+    )
+    defaults.update(overrides)
+    return StackRunConfig(**defaults)
+
+
 @pytest.fixture
 def mock_providers():
     """Mock the available_providers function to return test providers."""
@@ -47,8 +82,8 @@ def mock_providers():
 @pytest.fixture
 def base_config(tmp_path):
     """Create a base StackRunConfig with common settings."""
-    return StackRunConfig(
-        image_name="test_image",
+    return make_stack_config(
+        apis=["inference"],
         providers={
             "inference": [
                 Provider(
@@ -222,8 +257,8 @@ class TestProviderRegistry:
 
     def test_missing_directory(self, mock_providers):
         """Test handling of missing external providers directory."""
-        config = StackRunConfig(
-            image_name="test_image",
+        config = make_stack_config(
+            apis=["inference"],
             providers={
                 "inference": [
                     Provider(
@@ -278,7 +313,6 @@ pip_packages:
         """Test loading an external provider from a module (success path)."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.providers.datatypes import Api, ProviderSpec
 
         # Simulate a provider module with get_provider_spec
@@ -293,7 +327,7 @@ pip_packages:
         import_module_side_effect = make_import_module_side_effect(external_module=fake_module)
 
         with patch("importlib.import_module", side_effect=import_module_side_effect) as mock_import:
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -317,12 +351,11 @@ pip_packages:
 
     def test_external_provider_from_module_not_found(self, mock_providers):
         """Test handling ModuleNotFoundError for missing provider module."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig
 
         import_module_side_effect = make_import_module_side_effect(raise_for_external=True)
 
         with patch("importlib.import_module", side_effect=import_module_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -341,12 +374,11 @@ pip_packages:
 
     def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers):
         """Test handling missing get_provider_spec in provider module (should raise ValueError)."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig
 
         import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True)
 
         with patch("importlib.import_module", side_effect=import_module_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -399,13 +431,12 @@ class TestGetExternalProvidersFromModule:
 
     def test_stackrunconfig_provider_without_module(self, mock_providers):
         """Test that providers without module attribute are skipped."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
 
         import_module_side_effect = make_import_module_side_effect()
 
         with patch("importlib.import_module", side_effect=import_module_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -426,7 +457,6 @@ class TestGetExternalProvidersFromModule:
         """Test provider with module containing version spec (e.g., package==1.0.0)."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
         from llama_stack.providers.datatypes import ProviderSpec
 
@@ -444,7 +474,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -564,7 +594,6 @@ class TestGetExternalProvidersFromModule:
         """Test when get_provider_spec returns a list of specs."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
         from llama_stack.providers.datatypes import ProviderSpec
 
@@ -589,7 +618,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -613,7 +642,6 @@ class TestGetExternalProvidersFromModule:
         """Test that list return filters specs by provider_type."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
         from llama_stack.providers.datatypes import ProviderSpec
 
@@ -638,7 +666,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -662,7 +690,6 @@ class TestGetExternalProvidersFromModule:
         """Test that list return adds multiple different provider_types when config requests them."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
         from llama_stack.providers.datatypes import ProviderSpec
 
@@ -688,7 +715,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -718,7 +745,6 @@ class TestGetExternalProvidersFromModule:
 
     def test_module_not_found_raises_value_error(self, mock_providers):
         """Test that ModuleNotFoundError raises ValueError with helpful message."""
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
 
         def import_side_effect(name):
@@ -727,7 +753,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -751,7 +777,6 @@ class TestGetExternalProvidersFromModule:
         """Test that generic exceptions are properly raised."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
 
         def bad_spec():
@@ -765,7 +790,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
@@ -787,10 +812,9 @@ class TestGetExternalProvidersFromModule:
 
     def test_empty_provider_list(self, mock_providers):
         """Test with empty provider list."""
-        from llama_stack.core.datatypes import StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
 
-        config = StackRunConfig(
+        config = make_stack_config(
             image_name="test_image",
             providers={},
         )
@@ -805,7 +829,6 @@ class TestGetExternalProvidersFromModule:
         """Test multiple APIs with providers."""
         from types import SimpleNamespace
 
-        from llama_stack.core.datatypes import Provider, StackRunConfig
         from llama_stack.core.distribution import get_external_providers_from_module
         from llama_stack.providers.datatypes import ProviderSpec
 
@@ -830,7 +853,7 @@ class TestGetExternalProvidersFromModule:
             raise ModuleNotFoundError(name)
 
         with patch("importlib.import_module", side_effect=import_side_effect):
-            config = StackRunConfig(
+            config = make_stack_config(
                 image_name="test_image",
                 providers={
                     "inference": [
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index e14e033b9..426e2cf64 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -11,11 +11,12 @@ from llama_stack.apis.common.errors import ResourceNotFoundError
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.files import OpenAIFilePurpose
 from llama_stack.core.access_control.access_control import default_policy
+from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
 from llama_stack.providers.inline.files.localfs import (
     LocalfsFilesImpl,
     LocalfsFilesImplConfig,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 class MockUploadFile:
@@ -36,8 +37,11 @@ async def files_provider(tmp_path):
     storage_dir = tmp_path / "files"
     db_path = tmp_path / "files_metadata.db"
 
+    backend_name = "sql_localfs_test"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())})
     config = LocalfsFilesImplConfig(
-        storage_dir=storage_dir.as_posix(), metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix())
+        storage_dir=storage_dir.as_posix(),
+        metadata_store=SqlStoreReference(backend=backend_name, table_name="files_metadata"),
     )
 
     provider = LocalfsFilesImpl(config, default_policy())
diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py
index b2c619e49..fe30e1a77 100644
--- a/tests/unit/prompts/prompts/conftest.py
+++ b/tests/unit/prompts/prompts/conftest.py
@@ -9,7 +9,16 @@ import random
 import pytest
 
 from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
 
 
 @pytest.fixture
@@ -19,12 +28,28 @@ async def temp_prompt_store(tmp_path_factory):
     db_path = str(temp_dir / f"{unique_id}.db")
 
     from llama_stack.core.datatypes import StackRunConfig
-    from llama_stack.providers.utils.kvstore import kvstore_impl
 
-    mock_run_config = StackRunConfig(image_name="test-distribution", apis=[], providers={})
+    storage = StorageConfig(
+        backends={
+            "kv_test": SqliteKVStoreConfig(db_path=db_path),
+            "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")),
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(backend="kv_test", namespace="registry"),
+            inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
+            conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
+        ),
+    )
+    mock_run_config = StackRunConfig(
+        image_name="test-distribution",
+        apis=[],
+        providers={},
+        storage=storage,
+    )
     config = PromptServiceConfig(run_config=mock_run_config)
     store = PromptServiceImpl(config, deps={})
 
-    store.kvstore = await kvstore_impl(SqliteKVStoreConfig(db_path=db_path))
+    register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
+    store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts"))
 
     yield store
diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py
index cfb3e1327..dfd9b6d52 100644
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@@ -26,6 +26,20 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
 
 
+@pytest.fixture(autouse=True)
+def setup_backends(tmp_path):
+    """Register KV and SQL store backends for testing."""
+    from llama_stack.core.storage.datatypes import SqliteKVStoreConfig, SqliteSqlStoreConfig
+    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
+    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+    kv_path = str(tmp_path / "test_kv.db")
+    sql_path = str(tmp_path / "test_sql.db")
+
+    register_kvstore_backends({"kv_default": SqliteKVStoreConfig(db_path=kv_path)})
+    register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=sql_path)})
+
+
 @pytest.fixture
 def mock_apis():
     return {
@@ -40,15 +54,20 @@ def mock_apis():
 
 @pytest.fixture
 def config(tmp_path):
+    from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+    from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig
+
     return MetaReferenceAgentsImplConfig(
-        persistence_store={
-            "type": "sqlite",
-            "db_path": str(tmp_path / "test.db"),
-        },
-        responses_store={
-            "type": "sqlite",
-            "db_path": str(tmp_path / "test.db"),
-        },
+        persistence=AgentPersistenceConfig(
+            agent_state=KVStoreReference(
+                backend="kv_default",
+                namespace="agents",
+            ),
+            responses=ResponsesStoreReference(
+                backend="sql_default",
+                table_name="responses",
+            ),
+        )
     )
 
 
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 54c1820fb..f31ec0c28 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -42,7 +42,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.tools.tools import ListToolDefsResponse, ToolDef, ToolGroups, ToolInvocationResult, ToolRuntime
 from llama_stack.core.access_control.access_control import default_policy
-from llama_stack.core.datatypes import ResponsesStoreConfig
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
     OpenAIResponsesImpl,
 )
@@ -50,7 +50,7 @@ from llama_stack.providers.utils.responses.responses_store import (
     ResponsesStore,
     _OpenAIResponseObjectWithInputAndMessages,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 from tests.unit.providers.agents.meta_reference.fixtures import load_chat_completion_fixture
 
 
@@ -917,8 +917,10 @@ async def test_responses_store_list_input_items_logic():
 
     # Create mock store and response store
     mock_sql_store = AsyncMock()
+    backend_name = "sql_responses_test"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path="mock_db_path")})
     responses_store = ResponsesStore(
-        ResponsesStoreConfig(sql_store_config=SqliteSqlStoreConfig(db_path="mock_db_path")), policy=default_policy()
+        ResponsesStoreReference(backend=backend_name, table_name="responses"), policy=default_policy()
     )
     responses_store.sql_store = mock_sql_store
 
diff --git a/tests/unit/providers/batches/conftest.py b/tests/unit/providers/batches/conftest.py
index df37141b5..d161bf976 100644
--- a/tests/unit/providers/batches/conftest.py
+++ b/tests/unit/providers/batches/conftest.py
@@ -12,10 +12,10 @@ from unittest.mock import AsyncMock
 
 import pytest
 
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.batches.reference.batches import ReferenceBatchesImpl
 from llama_stack.providers.inline.batches.reference.config import ReferenceBatchesImplConfig
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
 
 
 @pytest.fixture
@@ -23,8 +23,10 @@ async def provider():
     """Create a test provider instance with temporary database."""
     with tempfile.TemporaryDirectory() as tmpdir:
         db_path = Path(tmpdir) / "test_batches.db"
+        backend_name = "kv_batches_test"
         kvstore_config = SqliteKVStoreConfig(db_path=str(db_path))
-        config = ReferenceBatchesImplConfig(kvstore=kvstore_config)
+        register_kvstore_backends({backend_name: kvstore_config})
+        config = ReferenceBatchesImplConfig(kvstore=KVStoreReference(backend=backend_name, namespace="batches"))
 
         # Create kvstore and mock APIs
         kvstore = await kvstore_impl(config.kvstore)
diff --git a/tests/unit/providers/files/conftest.py b/tests/unit/providers/files/conftest.py
index 46282e3dc..c64ecc3a3 100644
--- a/tests/unit/providers/files/conftest.py
+++ b/tests/unit/providers/files/conftest.py
@@ -8,8 +8,9 @@ import boto3
 import pytest
 from moto import mock_aws
 
+from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
 from llama_stack.providers.remote.files.s3 import S3FilesImplConfig, get_adapter_impl
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 class MockUploadFile:
@@ -38,11 +39,13 @@ def sample_text_file2():
 def s3_config(tmp_path):
     db_path = tmp_path / "s3_files_metadata.db"
 
+    backend_name = f"sql_s3_{tmp_path.name}"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path.as_posix())})
     return S3FilesImplConfig(
         bucket_name=f"test-bucket-{tmp_path.name}",
         region="not-a-region",
         auto_create_bucket=True,
-        metadata_store=SqliteSqlStoreConfig(db_path=db_path.as_posix()),
+        metadata_store=SqlStoreReference(backend=backend_name, table_name="s3_files_metadata"),
     )
 
 
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 8e5c85cf1..6d0367beb 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -12,13 +12,14 @@ import pytest
 
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
 from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import register_kvstore_backends
 
 EMBEDDING_DIMENSION = 768
 COLLECTION_PREFIX = "test_collection"
@@ -112,8 +113,9 @@ async def unique_kvstore_config(tmp_path_factory):
     unique_id = f"test_kv_{np.random.randint(1e6)}"
     temp_dir = tmp_path_factory.getbasetemp()
     db_path = str(temp_dir / f"{unique_id}.db")
-
-    return SqliteKVStoreConfig(db_path=db_path)
+    backend_name = f"kv_vector_{unique_id}"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)})
+    return KVStoreReference(backend=backend_name, namespace=f"vector_io::{unique_id}")
 
 
 @pytest.fixture(scope="session")
@@ -138,7 +140,7 @@ async def sqlite_vec_vec_index(embedding_dimension, tmp_path_factory):
 async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inference_api, embedding_dimension):
     config = SQLiteVectorIOConfig(
         db_path=sqlite_vec_db_path,
-        kvstore=unique_kvstore_config,
+        persistence=unique_kvstore_config,
     )
     adapter = SQLiteVecVectorIOAdapter(
         config=config,
@@ -177,7 +179,7 @@ async def faiss_vec_index(embedding_dimension):
 @pytest.fixture
 async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension):
     config = FaissVectorIOConfig(
-        kvstore=unique_kvstore_config,
+        persistence=unique_kvstore_config,
     )
     adapter = FaissVectorIOAdapter(
         config=config,
@@ -253,7 +255,7 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd
         db="test_db",
         user="test_user",
         password="test_password",
-        kvstore=unique_kvstore_config,
+        persistence=unique_kvstore_config,
     )
 
     adapter = PGVectorVectorIOAdapter(config, mock_inference_api, None)
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index e49c9dc77..95022ad33 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -10,13 +10,13 @@ import pytest
 from llama_stack.apis.inference import Model
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.core.datatypes import VectorDBWithOwner
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
 from llama_stack.core.store.registry import (
     KEY_FORMAT,
     CachedDiskDistributionRegistry,
     DiskDistributionRegistry,
 )
-from llama_stack.providers.utils.kvstore import kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
 
 
 @pytest.fixture
@@ -72,7 +72,11 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db,
 
     # Test cached version loads from disk
     db_path = sqlite_kvstore.db_path
-    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)))
+    backend_name = "kv_cached_test"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)})
+    cached_registry = CachedDiskDistributionRegistry(
+        await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
+    )
     await cached_registry.initialize()
 
     result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
@@ -101,7 +105,11 @@ async def test_cached_registry_updates(cached_disk_dist_registry):
 
     # Verify persisted to disk
     db_path = cached_disk_dist_registry.kvstore.db_path
-    new_registry = DiskDistributionRegistry(await kvstore_impl(SqliteKVStoreConfig(db_path=db_path)))
+    backend_name = "kv_cached_new"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=db_path)})
+    new_registry = DiskDistributionRegistry(
+        await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
+    )
     await new_registry.initialize()
     result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
     assert result_vector_db is not None
diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py
index 85acbc66a..16b1772ce 100644
--- a/tests/unit/server/test_quota.py
+++ b/tests/unit/server/test_quota.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from uuid import uuid4
+
 import pytest
 from fastapi import FastAPI, Request
 from fastapi.testclient import TestClient
@@ -11,7 +13,8 @@ from starlette.middleware.base import BaseHTTPMiddleware
 
 from llama_stack.core.datatypes import QuotaConfig, QuotaPeriod
 from llama_stack.core.server.quota import QuotaMiddleware
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore import register_kvstore_backends
 
 
 class InjectClientIDMiddleware(BaseHTTPMiddleware):
@@ -29,8 +32,10 @@ class InjectClientIDMiddleware(BaseHTTPMiddleware):
 
 
 def build_quota_config(db_path) -> QuotaConfig:
+    backend_name = f"kv_quota_{uuid4().hex}"
+    register_kvstore_backends({backend_name: SqliteKVStoreConfig(db_path=str(db_path))})
     return QuotaConfig(
-        kvstore=SqliteKVStoreConfig(db_path=str(db_path)),
+        kvstore=KVStoreReference(backend=backend_name, namespace="quota"),
         anonymous_max_requests=1,
         authenticated_max_requests=2,
         period=QuotaPeriod.DAY,
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 1ee1b2f47..b44f12f7e 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -12,15 +12,22 @@ from unittest.mock import AsyncMock, MagicMock
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import Inference
-from llama_stack.core.datatypes import (
-    Api,
-    Provider,
-    StackRunConfig,
-)
+from llama_stack.core.datatypes import Api, Provider, StackRunConfig
 from llama_stack.core.resolver import resolve_impls
 from llama_stack.core.routers.inference import InferenceRouter
 from llama_stack.core.routing_tables.models import ModelsRoutingTable
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    ServerStoresConfig,
+    SqliteKVStoreConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
 from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
+from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
@@ -65,6 +72,35 @@ class SampleImpl:
         pass
 
 
+def make_run_config(**overrides) -> StackRunConfig:
+    storage = overrides.pop(
+        "storage",
+        StorageConfig(
+            backends={
+                "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
+                "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
+            },
+            stores=ServerStoresConfig(
+                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+                conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+            ),
+        ),
+    )
+    register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")})
+    register_sqlstore_backends(
+        {name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")}
+    )
+    defaults = dict(
+        image_name="test_image",
+        apis=[],
+        providers={},
+        storage=storage,
+    )
+    defaults.update(overrides)
+    return StackRunConfig(**defaults)
+
+
 async def test_resolve_impls_basic():
     # Create a real provider spec
     provider_spec = InlineProviderSpec(
@@ -78,7 +114,7 @@ async def test_resolve_impls_basic():
     # Create provider registry with our provider
     provider_registry = {Api.inference: {provider_spec.provider_type: provider_spec}}
 
-    run_config = StackRunConfig(
+    run_config = make_run_config(
         image_name="test_image",
         providers={
             "inference": [
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index f6d63490a..d2de1c759 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -5,7 +5,6 @@
 # the root directory of this source tree.
 
 import time
-from tempfile import TemporaryDirectory
 
 import pytest
 
@@ -16,8 +15,16 @@ from llama_stack.apis.inference import (
     OpenAIUserMessageParam,
     Order,
 )
+from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+
+@pytest.fixture(autouse=True)
+def setup_backends(tmp_path):
+    """Register SQL store backends for testing."""
+    db_path = str(tmp_path / "test.db")
+    register_sqlstore_backends({"sql_default": SqliteSqlStoreConfig(db_path=db_path)})
 
 
 def create_test_chat_completion(
@@ -44,167 +51,162 @@ def create_test_chat_completion(
 
 async def test_inference_store_pagination_basic():
     """Test basic pagination functionality."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data with different timestamps
-        base_time = int(time.time())
-        test_data = [
-            ("zebra-task", base_time + 1),
-            ("apple-job", base_time + 2),
-            ("moon-work", base_time + 3),
-            ("banana-run", base_time + 4),
-            ("car-exec", base_time + 5),
-        ]
+    # Create test data with different timestamps
+    base_time = int(time.time())
+    test_data = [
+        ("zebra-task", base_time + 1),
+        ("apple-job", base_time + 2),
+        ("moon-work", base_time + 3),
+        ("banana-run", base_time + 4),
+        ("car-exec", base_time + 5),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test 1: First page with limit=2, descending order (default)
-        result = await store.list_chat_completions(limit=2, order=Order.desc)
-        assert len(result.data) == 2
-        assert result.data[0].id == "car-exec"  # Most recent first
-        assert result.data[1].id == "banana-run"
-        assert result.has_more is True
-        assert result.last_id == "banana-run"
+    # Test 1: First page with limit=2, descending order (default)
+    result = await store.list_chat_completions(limit=2, order=Order.desc)
+    assert len(result.data) == 2
+    assert result.data[0].id == "car-exec"  # Most recent first
+    assert result.data[1].id == "banana-run"
+    assert result.has_more is True
+    assert result.last_id == "banana-run"
 
-        # Test 2: Second page using 'after' parameter
-        result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
-        assert len(result2.data) == 2
-        assert result2.data[0].id == "moon-work"
-        assert result2.data[1].id == "apple-job"
-        assert result2.has_more is True
+    # Test 2: Second page using 'after' parameter
+    result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
+    assert len(result2.data) == 2
+    assert result2.data[0].id == "moon-work"
+    assert result2.data[1].id == "apple-job"
+    assert result2.has_more is True
 
-        # Test 3: Final page
-        result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
-        assert len(result3.data) == 1
-        assert result3.data[0].id == "zebra-task"
-        assert result3.has_more is False
+    # Test 3: Final page
+    result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
+    assert len(result3.data) == 1
+    assert result3.data[0].id == "zebra-task"
+    assert result3.has_more is False
 
 
 async def test_inference_store_pagination_ascending():
     """Test pagination with ascending order."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data
-        base_time = int(time.time())
-        test_data = [
-            ("delta-item", base_time + 1),
-            ("charlie-task", base_time + 2),
-            ("alpha-work", base_time + 3),
-        ]
+    # Create test data
+    base_time = int(time.time())
+    test_data = [
+        ("delta-item", base_time + 1),
+        ("charlie-task", base_time + 2),
+        ("alpha-work", base_time + 3),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test ascending order pagination
-        result = await store.list_chat_completions(limit=1, order=Order.asc)
-        assert len(result.data) == 1
-        assert result.data[0].id == "delta-item"  # Oldest first
-        assert result.has_more is True
+    # Test ascending order pagination
+    result = await store.list_chat_completions(limit=1, order=Order.asc)
+    assert len(result.data) == 1
+    assert result.data[0].id == "delta-item"  # Oldest first
+    assert result.has_more is True
 
-        # Second page with ascending order
-        result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
-        assert len(result2.data) == 1
-        assert result2.data[0].id == "charlie-task"
-        assert result2.has_more is True
+    # Second page with ascending order
+    result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
+    assert len(result2.data) == 1
+    assert result2.data[0].id == "charlie-task"
+    assert result2.has_more is True
 
 
 async def test_inference_store_pagination_with_model_filter():
     """Test pagination combined with model filtering."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data with different models
-        base_time = int(time.time())
-        test_data = [
-            ("xyz-task", base_time + 1, "model-a"),
-            ("def-work", base_time + 2, "model-b"),
-            ("pqr-job", base_time + 3, "model-a"),
-            ("abc-run", base_time + 4, "model-b"),
-        ]
+    # Create test data with different models
+    base_time = int(time.time())
+    test_data = [
+        ("xyz-task", base_time + 1, "model-a"),
+        ("def-work", base_time + 2, "model-b"),
+        ("pqr-job", base_time + 3, "model-a"),
+        ("abc-run", base_time + 4, "model-b"),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp, model in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp, model)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp, model in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp, model)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test pagination with model filter
-        result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
-        assert len(result.data) == 1
-        assert result.data[0].id == "pqr-job"  # Most recent model-a
-        assert result.data[0].model == "model-a"
-        assert result.has_more is True
+    # Test pagination with model filter
+    result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
+    assert len(result.data) == 1
+    assert result.data[0].id == "pqr-job"  # Most recent model-a
+    assert result.data[0].model == "model-a"
+    assert result.has_more is True
 
-        # Second page with model filter
-        result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
-        assert len(result2.data) == 1
-        assert result2.data[0].id == "xyz-task"
-        assert result2.data[0].model == "model-a"
-        assert result2.has_more is False
+    # Second page with model filter
+    result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
+    assert len(result2.data) == 1
+    assert result2.data[0].id == "xyz-task"
+    assert result2.data[0].model == "model-a"
+    assert result2.has_more is False
 
 
 async def test_inference_store_pagination_invalid_after():
     """Test error handling for invalid 'after' parameter."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Try to paginate with non-existent ID
-        with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
-            await store.list_chat_completions(after="non-existent", limit=2)
+    # Try to paginate with non-existent ID
+    with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
+        await store.list_chat_completions(after="non-existent", limit=2)
 
 
 async def test_inference_store_pagination_no_limit():
     """Test pagination behavior when no limit is specified."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data
-        base_time = int(time.time())
-        test_data = [
-            ("omega-first", base_time + 1),
-            ("beta-second", base_time + 2),
-        ]
+    # Create test data
+    base_time = int(time.time())
+    test_data = [
+        ("omega-first", base_time + 1),
+        ("beta-second", base_time + 2),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test without limit
-        result = await store.list_chat_completions(order=Order.desc)
-        assert len(result.data) == 2
-        assert result.data[0].id == "beta-second"  # Most recent first
-        assert result.data[1].id == "omega-first"
-        assert result.has_more is False
+    # Test without limit
+    result = await store.list_chat_completions(order=Order.desc)
+    assert len(result.data) == 2
+    assert result.data[0].id == "beta-second"  # Most recent first
+    assert result.data[1].id == "omega-first"
+    assert result.has_more is False
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index c27b5a8e5..34cff3d3f 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -6,6 +6,7 @@
 
 import time
 from tempfile import TemporaryDirectory
+from uuid import uuid4
 
 import pytest
 
@@ -15,8 +16,18 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseObject,
 )
 from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.responses.responses_store import ResponsesStore
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+
+def build_store(db_path: str, policy: list | None = None) -> ResponsesStore:
+    backend_name = f"sql_responses_{uuid4().hex}"
+    register_sqlstore_backends({backend_name: SqliteSqlStoreConfig(db_path=db_path)})
+    return ResponsesStore(
+        ResponsesStoreReference(backend=backend_name, table_name="responses"),
+        policy=policy or [],
+    )
 
 
 def create_test_response_object(
@@ -54,7 +65,7 @@ async def test_responses_store_pagination_basic():
     """Test basic pagination functionality for responses store."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Create test data with different timestamps
@@ -103,7 +114,7 @@ async def test_responses_store_pagination_ascending():
     """Test pagination with ascending order."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Create test data
@@ -141,7 +152,7 @@ async def test_responses_store_pagination_with_model_filter():
     """Test pagination combined with model filtering."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Create test data with different models
@@ -182,7 +193,7 @@ async def test_responses_store_pagination_invalid_after():
     """Test error handling for invalid 'after' parameter."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Try to paginate with non-existent ID
@@ -194,7 +205,7 @@ async def test_responses_store_pagination_no_limit():
     """Test pagination behavior when no limit is specified."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Create test data
@@ -226,7 +237,7 @@ async def test_responses_store_get_response_object():
     """Test retrieving a single response object."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Store a test response
@@ -254,7 +265,7 @@ async def test_responses_store_input_items_pagination():
     """Test pagination functionality for input items."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Store a test response with many inputs with explicit IDs
@@ -335,7 +346,7 @@ async def test_responses_store_input_items_before_pagination():
     """Test before pagination functionality for input items."""
     with TemporaryDirectory() as tmp_dir:
         db_path = tmp_dir + "/test.db"
-        store = ResponsesStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
+        store = build_store(db_path)
         await store.initialize()
 
         # Store a test response with many inputs with explicit IDs

From 48581bf651c334ea78d48b1866247020065c5d4b Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Mon, 20 Oct 2025 17:22:45 -0400
Subject: [PATCH 3/3] chore: Updating how default embedding model is set in
 stack (#3818)

# What does this PR do?

Refactor setting default vector store provider and embedding model to
use an optional `vector_stores` config in the `StackRunConfig` and clean
up code to do so (had to add back in some pieces of VectorDB). Also
added remote Qdrant and Weaviate to starter distro (based on other PR
where inference providers were added for UX).

New config is simply (default for Starter distro):

```yaml
vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
```

## Test Plan
CI and Unit tests.

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 .../workflows/integration-vector-io-tests.yml |   4 +-
 docs/docs/building_applications/rag.mdx       |  28 +-
 llama_stack/apis/datatypes.py                 |   1 +
 llama_stack/apis/vector_dbs/vector_dbs.py     |  34 +-
 llama_stack/core/datatypes.py                 |  25 ++
 llama_stack/core/distribution.py              |   4 +
 llama_stack/core/resolver.py                  |   2 +
 llama_stack/core/routers/__init__.py          |   5 +
 llama_stack/core/routers/vector_io.py         |  40 ++-
 llama_stack/core/routing_tables/common.py     |   3 +
 llama_stack/core/routing_tables/vector_dbs.py | 323 ++++++++++++++++++
 llama_stack/core/stack.py                     |  64 ++--
 llama_stack/distributions/ci-tests/build.yaml |   2 +
 llama_stack/distributions/ci-tests/run.yaml   |  20 ++
 .../distributions/starter-gpu/build.yaml      |   2 +
 .../distributions/starter-gpu/run.yaml        |  20 ++
 llama_stack/distributions/starter/build.yaml  |   2 +
 llama_stack/distributions/starter/run.yaml    |  20 ++
 llama_stack/distributions/starter/starter.py  |  29 ++
 llama_stack/distributions/template.py         |   9 +-
 .../sentence_transformers.py                  |   1 -
 .../inline/vector_io/chroma/__init__.py       |  11 +-
 .../inline/vector_io/faiss/__init__.py        |   7 +-
 .../providers/inline/vector_io/faiss/faiss.py |  64 +---
 .../inline/vector_io/milvus/__init__.py       |   7 +-
 .../inline/vector_io/qdrant/__init__.py       |   7 +-
 .../inline/vector_io/sqlite_vec/__init__.py   |   7 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  62 +---
 .../remote/vector_io/chroma/__init__.py       |   7 +-
 .../remote/vector_io/chroma/chroma.py         |  57 +---
 .../remote/vector_io/milvus/__init__.py       |   8 +-
 .../remote/vector_io/milvus/milvus.py         |  81 +----
 .../remote/vector_io/pgvector/__init__.py     |   2 +-
 .../remote/vector_io/pgvector/pgvector.py     |  51 +--
 .../remote/vector_io/qdrant/__init__.py       |   7 +-
 .../remote/vector_io/qdrant/qdrant.py         |  46 +--
 .../remote/vector_io/weaviate/__init__.py     |   7 +-
 .../remote/vector_io/weaviate/config.py       |   6 +-
 .../remote/vector_io/weaviate/weaviate.py     |  84 +----
 .../utils/memory/openai_vector_store_mixin.py | 101 +-----
 tests/integration/conftest.py                 |  69 ++++
 tests/integration/fixtures/common.py          |   8 +
 .../vector_io/test_openai_vector_stores.py    | 155 +++++++--
 tests/integration/vector_io/test_vector_io.py |  57 +++-
 tests/unit/core/test_stack_validation.py      | 126 +++----
 tests/unit/providers/vector_io/conftest.py    |   2 -
 tests/unit/providers/vector_io/test_faiss.py  |  19 +-
 .../test_vector_io_openai_vector_stores.py    |  95 +-----
 48 files changed, 973 insertions(+), 818 deletions(-)
 create mode 100644 llama_stack/core/routing_tables/vector_dbs.py

diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml
index e9a758873..a6a86b15f 100644
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@@ -169,9 +169,7 @@ jobs:
         run: |
           uv run --no-sync \
             pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
-            tests/integration/vector_io \
-            --embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
-            --embedding-dimension 768
+            tests/integration/vector_io
 
       - name: Check Storage and Memory Available After Tests
         if: ${{ always() }}
diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx
index 8307448be..b1681dc62 100644
--- a/docs/docs/building_applications/rag.mdx
+++ b/docs/docs/building_applications/rag.mdx
@@ -88,18 +88,19 @@ Llama Stack provides OpenAI-compatible RAG capabilities through:
 To enable automatic vector store creation without specifying embedding models, configure a default embedding model in your run.yaml like so:
 
 ```yaml
-models:
-  - model_id: nomic-ai/nomic-embed-text-v1.5
-    provider_id: inline::sentence-transformers
-    metadata:
-      embedding_dimension: 768
-      default_configured: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
 ```
 
 With this configuration:
-- `client.vector_stores.create()` works without requiring embedding model parameters
-- The system automatically uses the default model and its embedding dimension for any newly created vector store
-- Only one model can be marked as `default_configured: true`
+- `client.vector_stores.create()` works without requiring embedding model or provider parameters
+- The system automatically uses the default vector store provider (`faiss`) when multiple providers are available
+- The system automatically uses the default embedding model (`sentence-transformers/nomic-ai/nomic-embed-text-v1.5`) for any newly created vector store
+- The `default_provider_id` specifies which vector storage backend to use
+- The `default_embedding_model` specifies both the inference provider and model for embeddings
 
 ## Vector Store Operations
 
@@ -108,14 +109,15 @@ With this configuration:
 You can create vector stores with automatic or explicit embedding model selection:
 
 ```python
-# Automatic - uses default configured embedding model
+# Automatic - uses default configured embedding model and vector store provider
 vs = client.vector_stores.create()
 
-# Explicit - specify embedding model when you need a specific one
+# Explicit - specify embedding model and/or provider when you need specific ones
 vs = client.vector_stores.create(
     extra_body={
-        "embedding_model": "nomic-ai/nomic-embed-text-v1.5",
-        "embedding_dimension": 768
+        "provider_id": "faiss",  # Optional: specify vector store provider
+        "embedding_model": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5",
+        "embedding_dimension": 768  # Optional: will be auto-detected if not provided
     }
 )
 ```
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 8fbf21f3e..5777f3d04 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
 
     models = "models"
     shields = "shields"
+    vector_dbs = "vector_dbs"  # only used for routing
     datasets = "datasets"
     scoring_functions = "scoring_functions"
     benchmarks = "benchmarks"
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
index 53bf181e9..0368095cb 100644
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Literal
+from typing import Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
@@ -59,3 +59,35 @@ class ListVectorDBsResponse(BaseModel):
     """
 
     data: list[VectorDB]
+
+
+@runtime_checkable
+class VectorDBs(Protocol):
+    """Internal protocol for vector_dbs routing - no public API endpoints."""
+
+    async def list_vector_dbs(self) -> ListVectorDBsResponse:
+        """Internal method to list vector databases."""
+        ...
+
+    async def get_vector_db(
+        self,
+        vector_db_id: str,
+    ) -> VectorDB:
+        """Internal method to get a vector database by ID."""
+        ...
+
+    async def register_vector_db(
+        self,
+        vector_db_id: str,
+        embedding_model: str,
+        embedding_dimension: int | None = 384,
+        provider_id: str | None = None,
+        vector_db_name: str | None = None,
+        provider_vector_db_id: str | None = None,
+    ) -> VectorDB:
+        """Internal method to register a vector database."""
+        ...
+
+    async def unregister_vector_db(self, vector_db_id: str) -> None:
+        """Internal method to unregister a vector database."""
+        ...
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index d692da3b3..6d06adb84 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -354,6 +354,26 @@ class AuthenticationRequiredError(Exception):
     pass
 
 
+class QualifiedModel(BaseModel):
+    """A qualified model identifier, consisting of a provider ID and a model ID."""
+
+    provider_id: str
+    model_id: str
+
+
+class VectorStoresConfig(BaseModel):
+    """Configuration for vector stores in the stack."""
+
+    default_provider_id: str | None = Field(
+        default=None,
+        description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
+    )
+    default_embedding_model: QualifiedModel | None = Field(
+        default=None,
+        description="Default embedding model configuration for vector stores.",
+    )
+
+
 class QuotaPeriod(StrEnum):
     DAY = "day"
 
@@ -499,6 +519,11 @@ can be instantiated multiple times (with different configs) if necessary.
         description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
     )
 
+    vector_stores: VectorStoresConfig | None = Field(
+        default=None,
+        description="Configuration for vector stores, including default embedding model",
+    )
+
     @field_validator("external_providers_dir")
     @classmethod
     def validate_external_providers_dir(cls, v):
diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py
index 0e1f672c3..59461f5d6 100644
--- a/llama_stack/core/distribution.py
+++ b/llama_stack/core/distribution.py
@@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
             routing_table_api=Api.tool_groups,
             router_api=Api.tool_runtime,
         ),
+        AutoRoutedApiInfo(
+            routing_table_api=Api.vector_dbs,
+            router_api=Api.vector_io,
+        ),
     ]
 
 
diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py
index acd459f99..6e1843870 100644
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@@ -29,6 +29,7 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
+from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.core.client import get_client_impl
@@ -81,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
         Api.inspect: Inspect,
         Api.batches: Batches,
         Api.vector_io: VectorIO,
+        Api.vector_dbs: VectorDBs,
         Api.models: Models,
         Api.safety: Safety,
         Api.shields: Shields,
diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py
index 0573fc2c7..df4df0463 100644
--- a/llama_stack/core/routers/__init__.py
+++ b/llama_stack/core/routers/__init__.py
@@ -29,6 +29,7 @@ async def get_routing_table_impl(
     from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
     from ..routing_tables.shields import ShieldsRoutingTable
     from ..routing_tables.toolgroups import ToolGroupsRoutingTable
+    from ..routing_tables.vector_dbs import VectorDBsRoutingTable
 
     api_to_tables = {
         "models": ModelsRoutingTable,
@@ -37,6 +38,7 @@ async def get_routing_table_impl(
         "scoring_functions": ScoringFunctionsRoutingTable,
         "benchmarks": BenchmarksRoutingTable,
         "tool_groups": ToolGroupsRoutingTable,
+        "vector_dbs": VectorDBsRoutingTable,
     }
 
     if api.value not in api_to_tables:
@@ -91,6 +93,9 @@ async def get_auto_router_impl(
         await inference_store.initialize()
         api_to_dep_impl["store"] = inference_store
 
+    elif api == Api.vector_io:
+        api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
+
     impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
     await impl.initialize()
     return impl
diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py
index f4e871a40..bfc5f7164 100644
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreObject,
     VectorStoreSearchResponsePage,
 )
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 
@@ -43,9 +44,11 @@ class VectorIORouter(VectorIO):
     def __init__(
         self,
         routing_table: RoutingTable,
+        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         logger.debug("Initializing VectorIORouter")
         self.routing_table = routing_table
+        self.vector_stores_config = vector_stores_config
 
     async def initialize(self) -> None:
         logger.debug("VectorIORouter.initialize")
@@ -122,6 +125,17 @@ class VectorIORouter(VectorIO):
         embedding_dimension = extra.get("embedding_dimension")
         provider_id = extra.get("provider_id")
 
+        # Use default embedding model if not specified
+        if (
+            embedding_model is None
+            and self.vector_stores_config
+            and self.vector_stores_config.default_embedding_model is not None
+        ):
+            # Construct the full model ID with provider prefix
+            embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
+            model_id = self.vector_stores_config.default_embedding_model.model_id
+            embedding_model = f"{embedding_provider_id}/{model_id}"
+
         if embedding_model is not None and embedding_dimension is None:
             embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
 
@@ -132,11 +146,24 @@ class VectorIORouter(VectorIO):
                 raise ValueError("No vector_io providers available")
             if num_providers > 1:
                 available_providers = list(self.routing_table.impls_by_provider_id.keys())
-                raise ValueError(
-                    f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
-                    f"Available providers: {available_providers}"
-                )
-            provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
+                # Use default configured provider
+                if self.vector_stores_config and self.vector_stores_config.default_provider_id:
+                    default_provider = self.vector_stores_config.default_provider_id
+                    if default_provider in available_providers:
+                        provider_id = default_provider
+                        logger.debug(f"Using configured default vector store provider: {provider_id}")
+                    else:
+                        raise ValueError(
+                            f"Configured default vector store provider '{default_provider}' not found. "
+                            f"Available providers: {available_providers}"
+                        )
+                else:
+                    raise ValueError(
+                        f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
+                        f"Available providers: {available_providers}"
+                    )
+            else:
+                provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
 
         vector_db_id = f"vs_{uuid.uuid4()}"
         registered_vector_db = await self.routing_table.register_vector_db(
@@ -243,8 +270,7 @@ class VectorIORouter(VectorIO):
         vector_store_id: str,
     ) -> VectorStoreDeleteResponse:
         logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
-        provider = await self.routing_table.get_provider_impl(vector_store_id)
-        return await provider.openai_delete_vector_store(vector_store_id)
+        return await self.routing_table.openai_delete_vector_store(vector_store_id)
 
     async def openai_search_vector_store(
         self,
diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py
index 8df0a89a9..087483bb6 100644
--- a/llama_stack/core/routing_tables/common.py
+++ b/llama_stack/core/routing_tables/common.py
@@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
         from .scoring_functions import ScoringFunctionsRoutingTable
         from .shields import ShieldsRoutingTable
         from .toolgroups import ToolGroupsRoutingTable
+        from .vector_dbs import VectorDBsRoutingTable
 
         def apiname_object():
             if isinstance(self, ModelsRoutingTable):
                 return ("Inference", "model")
             elif isinstance(self, ShieldsRoutingTable):
                 return ("Safety", "shield")
+            elif isinstance(self, VectorDBsRoutingTable):
+                return ("VectorIO", "vector_db")
             elif isinstance(self, DatasetsRoutingTable):
                 return ("DatasetIO", "dataset")
             elif isinstance(self, ScoringFunctionsRoutingTable):
diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py
new file mode 100644
index 000000000..e87fb61c6
--- /dev/null
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@@ -0,0 +1,323 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import TypeAdapter
+
+from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack.apis.models import ModelType
+from llama_stack.apis.resource import ResourceType
+
+# Removed VectorDBs import to avoid exposing public API
+from llama_stack.apis.vector_io.vector_io import (
+    OpenAICreateVectorStoreRequestWithExtraBody,
+    SearchRankingOptions,
+    VectorStoreChunkingStrategy,
+    VectorStoreDeleteResponse,
+    VectorStoreFileContentsResponse,
+    VectorStoreFileDeleteResponse,
+    VectorStoreFileObject,
+    VectorStoreFileStatus,
+    VectorStoreObject,
+    VectorStoreSearchResponsePage,
+)
+from llama_stack.core.datatypes import (
+    VectorDBWithOwner,
+)
+from llama_stack.log import get_logger
+
+from .common import CommonRoutingTableImpl, lookup_model
+
+logger = get_logger(name=__name__, category="core::routing_tables")
+
+
+class VectorDBsRoutingTable(CommonRoutingTableImpl):
+    """Internal routing table for vector_db operations.
+
+    Does not inherit from VectorDBs to avoid exposing public API endpoints.
+    Only provides internal routing functionality for VectorIORouter.
+    """
+
+    # Internal methods only - no public API exposure
+
+    async def register_vector_db(
+        self,
+        vector_db_id: str,
+        embedding_model: str,
+        embedding_dimension: int | None = 384,
+        provider_id: str | None = None,
+        provider_vector_db_id: str | None = None,
+        vector_db_name: str | None = None,
+    ) -> Any:
+        if provider_id is None:
+            if len(self.impls_by_provider_id) > 0:
+                provider_id = list(self.impls_by_provider_id.keys())[0]
+                if len(self.impls_by_provider_id) > 1:
+                    logger.warning(
+                        f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
+                    )
+            else:
+                raise ValueError("No provider available. Please configure a vector_io provider.")
+        model = await lookup_model(self, embedding_model)
+        if model is None:
+            raise ModelNotFoundError(embedding_model)
+        if model.model_type != ModelType.embedding:
+            raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
+        if "embedding_dimension" not in model.metadata:
+            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
+
+        try:
+            provider = self.impls_by_provider_id[provider_id]
+        except KeyError:
+            available_providers = list(self.impls_by_provider_id.keys())
+            raise ValueError(
+                f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
+            ) from None
+        logger.warning(
+            "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
+        )
+        request = OpenAICreateVectorStoreRequestWithExtraBody(
+            name=vector_db_name or vector_db_id,
+            embedding_model=embedding_model,
+            embedding_dimension=model.metadata["embedding_dimension"],
+            provider_id=provider_id,
+            provider_vector_db_id=provider_vector_db_id,
+        )
+        vector_store = await provider.openai_create_vector_store(request)
+
+        vector_store_id = vector_store.id
+        actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
+        logger.warning(
+            f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
+        )
+
+        vector_db_data = {
+            "identifier": vector_store_id,
+            "type": ResourceType.vector_db.value,
+            "provider_id": provider_id,
+            "provider_resource_id": actual_provider_vector_db_id,
+            "embedding_model": embedding_model,
+            "embedding_dimension": model.metadata["embedding_dimension"],
+            "vector_db_name": vector_store.name,
+        }
+        vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
+        await self.register_object(vector_db)
+        return vector_db
+
+    async def openai_retrieve_vector_store(
+        self,
+        vector_store_id: str,
+    ) -> VectorStoreObject:
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_retrieve_vector_store(vector_store_id)
+
+    async def openai_update_vector_store(
+        self,
+        vector_store_id: str,
+        name: str | None = None,
+        expires_after: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> VectorStoreObject:
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_update_vector_store(
+            vector_store_id=vector_store_id,
+            name=name,
+            expires_after=expires_after,
+            metadata=metadata,
+        )
+
+    async def openai_delete_vector_store(
+        self,
+        vector_store_id: str,
+    ) -> VectorStoreDeleteResponse:
+        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        result = await provider.openai_delete_vector_store(vector_store_id)
+        await self.unregister_vector_db(vector_store_id)
+        return result
+
+    async def unregister_vector_db(self, vector_store_id: str) -> None:
+        """Remove the vector store from the routing table registry."""
+        try:
+            vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
+            if vector_db_obj:
+                await self.unregister_object(vector_db_obj)
+        except Exception as e:
+            # Log the error but don't fail the operation
+            logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
+
+    async def openai_search_vector_store(
+        self,
+        vector_store_id: str,
+        query: str | list[str],
+        filters: dict[str, Any] | None = None,
+        max_num_results: int | None = 10,
+        ranking_options: SearchRankingOptions | None = None,
+        rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
+    ) -> VectorStoreSearchResponsePage:
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_search_vector_store(
+            vector_store_id=vector_store_id,
+            query=query,
+            filters=filters,
+            max_num_results=max_num_results,
+            ranking_options=ranking_options,
+            rewrite_query=rewrite_query,
+            search_mode=search_mode,
+        )
+
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_attach_file_to_vector_store(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+            attributes=attributes,
+            chunking_strategy=chunking_strategy,
+        )
+
+    async def openai_list_files_in_vector_store(
+        self,
+        vector_store_id: str,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+        filter: VectorStoreFileStatus | None = None,
+    ) -> list[VectorStoreFileObject]:
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_list_files_in_vector_store(
+            vector_store_id=vector_store_id,
+            limit=limit,
+            order=order,
+            after=after,
+            before=before,
+            filter=filter,
+        )
+
+    async def openai_retrieve_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileObject:
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_retrieve_vector_store_file(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+        )
+
+    async def openai_retrieve_vector_store_file_contents(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileContentsResponse:
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_retrieve_vector_store_file_contents(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+        )
+
+    async def openai_update_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any],
+    ) -> VectorStoreFileObject:
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_update_vector_store_file(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+            attributes=attributes,
+        )
+
+    async def openai_delete_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileDeleteResponse:
+        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_delete_vector_store_file(
+            vector_store_id=vector_store_id,
+            file_id=file_id,
+        )
+
+    async def openai_create_vector_store_file_batch(
+        self,
+        vector_store_id: str,
+        file_ids: list[str],
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: Any | None = None,
+    ):
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_create_vector_store_file_batch(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            attributes=attributes,
+            chunking_strategy=chunking_strategy,
+        )
+
+    async def openai_retrieve_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ):
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_retrieve_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+        )
+
+    async def openai_list_files_in_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        filter: str | None = None,
+        limit: int | None = 20,
+        order: str | None = "desc",
+    ):
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_list_files_in_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+            after=after,
+            before=before,
+            filter=filter,
+            limit=limit,
+            order=order,
+        )
+
+    async def openai_cancel_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ):
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_cancel_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+        )
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index 15d0198b1..a2f7babd2 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -35,7 +35,7 @@ from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
-from llama_stack.core.datatypes import Provider, StackRunConfig
+from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
 from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
@@ -108,30 +108,6 @@ REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
 
 
-async def validate_default_embedding_model(impls: dict[Api, Any]):
-    """Validate that at most one embedding model is marked as default."""
-    if Api.models not in impls:
-        return
-
-    models_impl = impls[Api.models]
-    response = await models_impl.list_models()
-    models_list = response.data if hasattr(response, "data") else response
-
-    default_embedding_models = []
-    for model in models_list:
-        if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
-            default_embedding_models.append(model.identifier)
-
-    if len(default_embedding_models) > 1:
-        raise ValueError(
-            f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
-            "Only one embedding model can be marked as default."
-        )
-
-    if default_embedding_models:
-        logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
-
-
 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
     for rsrc, api, register_method, list_method in RESOURCES:
         objects = getattr(run_config, rsrc)
@@ -162,7 +138,41 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
                 f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
             )
 
-    await validate_default_embedding_model(impls)
+
+async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
+    """Validate vector stores configuration."""
+    if vector_stores_config is None:
+        return
+
+    default_embedding_model = vector_stores_config.default_embedding_model
+    if default_embedding_model is None:
+        return
+
+    provider_id = default_embedding_model.provider_id
+    model_id = default_embedding_model.model_id
+    default_model_id = f"{provider_id}/{model_id}"
+
+    if Api.models not in impls:
+        raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
+
+    models_impl = impls[Api.models]
+    response = await models_impl.list_models()
+    models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
+
+    default_model = models_list.get(default_model_id)
+    if default_model is None:
+        raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
+
+    embedding_dimension = default_model.metadata.get("embedding_dimension")
+    if embedding_dimension is None:
+        raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
+
+    try:
+        int(embedding_dimension)
+    except ValueError as err:
+        raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
+
+    logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
 
 
 class EnvVarError(Exception):
@@ -400,8 +410,8 @@ class Stack:
             await impls[Api.conversations].initialize()
 
         await register_resources(self.run_config, impls)
-
         await refresh_registry_once(impls)
+        await validate_vector_stores_config(self.run_config.vector_stores, impls)
         self.impls = impls
 
     def create_registry_refresh_task(self):
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index 191d0ae59..c01e415a9 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -25,6 +25,8 @@ distribution_spec:
     - provider_type: inline::milvus
     - provider_type: remote::chromadb
     - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
     files:
     - provider_type: inline::localfs
     safety:
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index f9e741474..1653dc9bd 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -128,6 +128,21 @@ providers:
       persistence:
         namespace: vector_io::pgvector
         backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
@@ -253,3 +268,8 @@ server:
   port: 8321
 telemetry:
   enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml
index 943c6134d..b2e2a0c85 100644
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@@ -26,6 +26,8 @@ distribution_spec:
     - provider_type: inline::milvus
     - provider_type: remote::chromadb
     - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
     files:
     - provider_type: inline::localfs
     safety:
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index abfa579a7..81f564779 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -128,6 +128,21 @@ providers:
       persistence:
         namespace: vector_io::pgvector
         backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
@@ -256,3 +271,8 @@ server:
   port: 8321
 telemetry:
   enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index c2719d50d..baa80ef3e 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -26,6 +26,8 @@ distribution_spec:
     - provider_type: inline::milvus
     - provider_type: remote::chromadb
     - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
     files:
     - provider_type: inline::localfs
     safety:
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index fc58a4afe..dc611a446 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -128,6 +128,21 @@ providers:
       persistence:
         namespace: vector_io::pgvector
         backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
@@ -253,3 +268,8 @@ server:
   port: 8321
 telemetry:
   enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index f87ebcc5f..c8c7101a6 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -11,8 +11,10 @@ from llama_stack.core.datatypes import (
     BuildProvider,
     Provider,
     ProviderSpec,
+    QualifiedModel,
     ShieldInput,
     ToolGroupInput,
+    VectorStoresConfig,
 )
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
@@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
 from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
+from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
+from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
 
 
@@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
             BuildProvider(provider_type="inline::milvus"),
             BuildProvider(provider_type="remote::chromadb"),
             BuildProvider(provider_type="remote::pgvector"),
+            BuildProvider(provider_type="remote::qdrant"),
+            BuildProvider(provider_type="remote::weaviate"),
         ],
         "files": [BuildProvider(provider_type="inline::localfs")],
         "safety": [
@@ -221,12 +227,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                                 password="${env.PGVECTOR_PASSWORD:=}",
                             ),
                         ),
+                        Provider(
+                            provider_id="${env.QDRANT_URL:+qdrant}",
+                            provider_type="remote::qdrant",
+                            config=QdrantVectorIOConfig.sample_run_config(
+                                f"~/.llama/distributions/{name}",
+                                url="${env.QDRANT_URL:=}",
+                            ),
+                        ),
+                        Provider(
+                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
+                            provider_type="remote::weaviate",
+                            config=WeaviateVectorIOConfig.sample_run_config(
+                                f"~/.llama/distributions/{name}",
+                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
+                            ),
+                        ),
                     ],
                     "files": [files_provider],
                 },
                 default_models=[],
                 default_tool_groups=default_tool_groups,
                 default_shields=default_shields,
+                vector_stores_config=VectorStoresConfig(
+                    default_provider_id="faiss",
+                    default_embedding_model=QualifiedModel(
+                        provider_id="sentence-transformers",
+                        model_id="nomic-ai/nomic-embed-text-v1.5",
+                    ),
+                ),
             ),
         },
         run_config_env_vars={
diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py
index 542c7bea9..daa609388 100644
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@@ -27,6 +27,7 @@ from llama_stack.core.datatypes import (
     ShieldInput,
     TelemetryConfig,
     ToolGroupInput,
+    VectorStoresConfig,
 )
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.storage.datatypes import (
@@ -186,6 +187,7 @@ class RunConfigSettings(BaseModel):
     default_tool_groups: list[ToolGroupInput] | None = None
     default_datasets: list[DatasetInput] | None = None
     default_benchmarks: list[BenchmarkInput] | None = None
+    vector_stores_config: VectorStoresConfig | None = None
     telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
     storage_backends: dict[str, Any] | None = None
     storage_stores: dict[str, Any] | None = None
@@ -263,7 +265,7 @@ class RunConfigSettings(BaseModel):
         )
 
         # Return a dict that matches StackRunConfig structure
-        return {
+        config = {
             "version": LLAMA_STACK_RUN_CONFIG_VERSION,
             "image_name": name,
             "container_image": container_image,
@@ -283,6 +285,11 @@ class RunConfigSettings(BaseModel):
             "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
         }
 
+        if self.vector_stores_config:
+            config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
+
+        return config
+
 
 class DistributionTemplate(BaseModel):
     """
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
index 871adcb24..cb72aa13a 100644
--- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
@@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
                 provider_id=self.__provider_id__,
                 metadata={
                     "embedding_dimension": 768,
-                    "default_configured": True,
                 },
                 model_type=ModelType.embedding,
             ),
diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py
index 09e869c90..575e5ad88 100644
--- a/llama_stack/providers/inline/vector_io/chroma/__init__.py
+++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py
@@ -12,15 +12,8 @@ from .config import ChromaVectorIOConfig
 
 
 async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
-    from llama_stack.providers.remote.vector_io.chroma.chroma import (
-        ChromaVectorIOAdapter,
-    )
+    from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter
 
-    impl = ChromaVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py
index c0f01bc9d..24d1f292a 100644
--- a/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -16,11 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
 
     assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
 
-    impl = FaissVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index ff1a6aa4c..f13eb3e96 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -17,27 +17,14 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-)
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    HealthResponse,
-    HealthStatus,
-    VectorDBsProtocolPrivate,
-)
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    EmbeddingIndex,
-    VectorDBWithIndex,
-)
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
 
 from .config import FaissVectorIOConfig
 
@@ -155,12 +142,7 @@ class FaissIndex(EmbeddingIndex):
 
         await self._save_index()
 
-    async def query_vector(
-        self,
-        embedding: NDArray,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
         chunks = []
         scores = []
@@ -175,12 +157,7 @@ class FaissIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         raise NotImplementedError(
             "Keyword search is not supported - underlying DB FAISS does not support this search mode"
         )
@@ -200,17 +177,10 @@ class FaissIndex(EmbeddingIndex):
 
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
-    def __init__(
-        self,
-        config: FaissVectorIOConfig,
-        inference_api: Inference,
-        models_api: Models,
-        files_api: Files | None,
-    ) -> None:
+    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.models_api = models_api
         self.cache: dict[str, VectorDBWithIndex] = {}
 
     async def initialize(self) -> None:
@@ -252,17 +222,11 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         except Exception as e:
             return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
 
-    async def register_vector_db(
-        self,
-        vector_db: VectorDB,
-    ) -> None:
+    async def register_vector_db(self, vector_db: VectorDB) -> None:
         assert self.kvstore is not None
 
         key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
-        await self.kvstore.set(
-            key=key,
-            value=vector_db.model_dump_json(),
-        )
+        await self.kvstore.set(key=key, value=vector_db.model_dump_json())
 
         # Store in cache
         self.cache[vector_db.identifier] = VectorDBWithIndex(
@@ -285,12 +249,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         del self.cache[vector_db_id]
         await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
 
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: list[Chunk],
-        ttl_seconds: int | None = None,
-    ) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = self.cache.get(vector_db_id)
         if index is None:
             raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
@@ -298,10 +257,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         await index.insert_chunks(chunks)
 
     async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: dict[str, Any] | None = None,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         index = self.cache.get(vector_db_id)
         if index is None:
diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py
index 46a006a91..7dc9c6a33 100644
--- a/llama_stack/providers/inline/vector_io/milvus/__init__.py
+++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py
@@ -14,11 +14,6 @@ from .config import MilvusVectorIOConfig
 async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
     from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
 
-    impl = MilvusVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py
index 2863f667c..bef6d50e6 100644
--- a/llama_stack/providers/inline/vector_io/qdrant/__init__.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py
@@ -15,11 +15,6 @@ async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]):
     from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
 
     assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = QdrantVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index 93921fb23..df96e927c 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -15,11 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
     from .sqlite_vec import SQLiteVecVectorIOAdapter
 
     assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = SQLiteVecVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index a58aa05b8..cfe23bde5 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -17,13 +17,8 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-)
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
@@ -175,32 +170,18 @@ class SQLiteVecIndex(EmbeddingIndex):
 
                     # Insert vector embeddings
                     embedding_data = [
-                        (
-                            (
-                                chunk.chunk_id,
-                                serialize_vector(emb.tolist()),
-                            )
-                        )
+                        ((chunk.chunk_id, serialize_vector(emb.tolist())))
                         for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
                     ]
-                    cur.executemany(
-                        f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);",
-                        embedding_data,
-                    )
+                    cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data)
 
                     # Insert FTS content
                     fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
                     # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
-                    cur.executemany(
-                        f"DELETE FROM [{self.fts_table}] WHERE id = ?;",
-                        [(row[0],) for row in fts_data],
-                    )
+                    cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data])
 
                     # INSERT new entries
-                    cur.executemany(
-                        f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);",
-                        fts_data,
-                    )
+                    cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data)
 
                 connection.commit()
 
@@ -216,12 +197,7 @@ class SQLiteVecIndex(EmbeddingIndex):
         # Run batch insertion in a background thread
         await asyncio.to_thread(_execute_all_batch_inserts)
 
-    async def query_vector(
-        self,
-        embedding: NDArray,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         """
         Performs vector-based search using a virtual table for vector similarity.
         """
@@ -261,12 +237,7 @@ class SQLiteVecIndex(EmbeddingIndex):
             scores.append(score)
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         """
         Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search.
         """
@@ -410,17 +381,10 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
     and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
     """
 
-    def __init__(
-        self,
-        config,
-        inference_api: Inference,
-        models_api: Models,
-        files_api: Files | None,
-    ) -> None:
+    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.models_api = models_api
         self.cache: dict[str, VectorDBWithIndex] = {}
         self.vector_db_store = None
 
@@ -433,9 +397,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         for db_json in stored_vector_dbs:
             vector_db = VectorDB.model_validate_json(db_json)
             index = await SQLiteVecIndex.create(
-                vector_db.embedding_dimension,
-                self.config.db_path,
-                vector_db.identifier,
+                vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
             )
             self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
@@ -450,11 +412,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         return [v.vector_db for v in self.cache.values()]
 
     async def register_vector_db(self, vector_db: VectorDB) -> None:
-        index = await SQLiteVecIndex.create(
-            vector_db.embedding_dimension,
-            self.config.db_path,
-            vector_db.identifier,
-        )
+        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
         self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
 
     async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/llama_stack/providers/remote/vector_io/chroma/__init__.py
index a6db48c43..e4b77c68d 100644
--- a/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -12,11 +12,6 @@ from .config import ChromaVectorIOConfig
 async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .chroma import ChromaVectorIOAdapter
 
-    impl = ChromaVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index b07207cc6..0aa728c32 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -12,24 +12,16 @@ import chromadb
 from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import InterleavedContent
+from llama_stack.apis.inference import Inference, InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-)
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    EmbeddingIndex,
-    VectorDBWithIndex,
-)
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
@@ -68,19 +60,13 @@ class ChromaIndex(EmbeddingIndex):
 
         ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
         await maybe_await(
-            self.collection.add(
-                documents=[chunk.model_dump_json() for chunk in chunks],
-                embeddings=embeddings,
-                ids=ids,
-            )
+            self.collection.add(documents=[chunk.model_dump_json() for chunk in chunks], embeddings=embeddings, ids=ids)
         )
 
     async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         results = await maybe_await(
             self.collection.query(
-                query_embeddings=[embedding.tolist()],
-                n_results=k,
-                include=["documents", "distances"],
+                query_embeddings=[embedding.tolist()], n_results=k, include=["documents", "distances"]
             )
         )
         distances = results["distances"][0]
@@ -108,12 +94,7 @@ class ChromaIndex(EmbeddingIndex):
     async def delete(self):
         await maybe_await(self.client.delete_collection(self.collection.name))
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         raise NotImplementedError("Keyword search is not supported in Chroma")
 
     async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
@@ -137,15 +118,13 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
     def __init__(
         self,
         config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
-        inference_api: Api.inference,
-        models_apis: Api.models,
+        inference_api: Inference,
         files_api: Files | None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
         self.config = config
         self.inference_api = inference_api
-        self.models_api = models_apis
         self.client = None
         self.cache = {}
         self.vector_db_store = None
@@ -172,14 +151,10 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(
-        self,
-        vector_db: VectorDB,
-    ) -> None:
+    async def register_vector_db(self, vector_db: VectorDB) -> None:
         collection = await maybe_await(
             self.client.get_or_create_collection(
-                name=vector_db.identifier,
-                metadata={"vector_db": vector_db.model_dump_json()},
+                name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()}
             )
         )
         self.cache[vector_db.identifier] = VectorDBWithIndex(
@@ -194,12 +169,7 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         await self.cache[vector_db_id].index.delete()
         del self.cache[vector_db_id]
 
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: list[Chunk],
-        ttl_seconds: int | None = None,
-    ) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if index is None:
             raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
@@ -207,10 +177,7 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         await index.insert_chunks(chunks)
 
     async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: dict[str, Any] | None = None,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
 
diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/llama_stack/providers/remote/vector_io/milvus/__init__.py
index dc5a642d6..526075bb2 100644
--- a/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -13,12 +13,6 @@ async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, Provide
     from .milvus import MilvusVectorIOAdapter
 
     assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
-
-    impl = MilvusVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 1f689d1a9..d7c34163d 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -14,13 +14,8 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-)
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
@@ -74,46 +69,23 @@ class MilvusIndex(EmbeddingIndex):
             logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
             # Create schema for vector search
             schema = self.client.create_schema()
-            schema.add_field(
-                field_name="chunk_id",
-                datatype=DataType.VARCHAR,
-                is_primary=True,
-                max_length=100,
-            )
+            schema.add_field(field_name="chunk_id", datatype=DataType.VARCHAR, is_primary=True, max_length=100)
             schema.add_field(
                 field_name="content",
                 datatype=DataType.VARCHAR,
                 max_length=65535,
                 enable_analyzer=True,  # Enable text analysis for BM25
             )
-            schema.add_field(
-                field_name="vector",
-                datatype=DataType.FLOAT_VECTOR,
-                dim=len(embeddings[0]),
-            )
-            schema.add_field(
-                field_name="chunk_content",
-                datatype=DataType.JSON,
-            )
+            schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0]))
+            schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
             # Add sparse vector field for BM25 (required by the function)
-            schema.add_field(
-                field_name="sparse",
-                datatype=DataType.SPARSE_FLOAT_VECTOR,
-            )
+            schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
 
             # Create indexes
             index_params = self.client.prepare_index_params()
-            index_params.add_index(
-                field_name="vector",
-                index_type="FLAT",
-                metric_type="COSINE",
-            )
+            index_params.add_index(field_name="vector", index_type="FLAT", metric_type="COSINE")
             # Add index for sparse field (required by BM25 function)
-            index_params.add_index(
-                field_name="sparse",
-                index_type="SPARSE_INVERTED_INDEX",
-                metric_type="BM25",
-            )
+            index_params.add_index(field_name="sparse", index_type="SPARSE_INVERTED_INDEX", metric_type="BM25")
 
             # Add BM25 function for full-text search
             bm25_function = Function(
@@ -144,11 +116,7 @@ class MilvusIndex(EmbeddingIndex):
                 }
             )
         try:
-            await asyncio.to_thread(
-                self.client.insert,
-                self.collection_name,
-                data=data,
-            )
+            await asyncio.to_thread(self.client.insert, self.collection_name, data=data)
         except Exception as e:
             logger.error(f"Error inserting chunks into Milvus collection {self.collection_name}: {e}")
             raise e
@@ -167,12 +135,7 @@ class MilvusIndex(EmbeddingIndex):
         scores = [res["distance"] for res in search_res[0]]
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         """
         Perform BM25-based keyword search using Milvus's built-in full-text search.
         """
@@ -210,12 +173,7 @@ class MilvusIndex(EmbeddingIndex):
             # Fallback to simple text search
             return await self._fallback_keyword_search(query_string, k, score_threshold)
 
-    async def _fallback_keyword_search(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def _fallback_keyword_search(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         """
         Fallback to simple text search when BM25 search is not available.
         """
@@ -308,7 +266,6 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self,
         config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
         inference_api: Inference,
-        models_api: Models,
         files_api: Files | None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
@@ -316,7 +273,6 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.cache = {}
         self.client = None
         self.inference_api = inference_api
-        self.models_api = models_api
         self.vector_db_store = None
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
@@ -355,10 +311,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(
-        self,
-        vector_db: VectorDB,
-    ) -> None:
+    async def register_vector_db(self, vector_db: VectorDB) -> None:
         if isinstance(self.config, RemoteMilvusVectorIOConfig):
             consistency_level = self.config.consistency_level
         else:
@@ -395,12 +348,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
             await self.cache[vector_db_id].index.delete()
             del self.cache[vector_db_id]
 
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: list[Chunk],
-        ttl_seconds: int | None = None,
-    ) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
@@ -408,10 +356,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         await index.insert_chunks(chunks)
 
     async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: dict[str, Any] | None = None,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index bb4079ab5..8086b7650 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -12,6 +12,6 @@ from .config import PGVectorVectorIOConfig
 async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .pgvector import PGVectorVectorIOAdapter
 
-    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None))
+    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 691cf965c..703a47843 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -16,26 +16,15 @@ from pydantic import BaseModel, TypeAdapter
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import (
-    Chunk,
-    QueryChunksResponse,
-    VectorIO,
-)
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    interleaved_content_as_str,
-)
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    EmbeddingIndex,
-    VectorDBWithIndex,
-)
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
 
 from .config import PGVectorVectorIOConfig
@@ -205,12 +194,7 @@ class PGVectorIndex(EmbeddingIndex):
 
             return QueryChunksResponse(chunks=chunks, scores=scores)
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         """
         Performs keyword-based search using PostgreSQL's full-text search with ts_rank scoring.
 
@@ -317,7 +301,7 @@ class PGVectorIndex(EmbeddingIndex):
         """Remove a chunk from the PostgreSQL table."""
         chunk_ids = [c.chunk_id for c in chunks_for_deletion]
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,))
+            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids))
 
     def get_pgvector_search_function(self) -> str:
         return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
@@ -341,16 +325,11 @@ class PGVectorIndex(EmbeddingIndex):
 
 class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
     def __init__(
-        self,
-        config: PGVectorVectorIOConfig,
-        inference_api: Inference,
-        models_api: Models,
-        files_api: Files | None = None,
+        self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.models_api = models_api
         self.conn = None
         self.cache = {}
         self.vector_db_store = None
@@ -407,11 +386,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
             vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
         )
         await pgvector_index.initialize()
-        index = VectorDBWithIndex(
-            vector_db,
-            index=pgvector_index,
-            inference_api=self.inference_api,
-        )
+        index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api)
         self.cache[vector_db.identifier] = index
 
     async def unregister_vector_db(self, vector_db_id: str) -> None:
@@ -424,20 +399,12 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
         assert self.kvstore is not None
         await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}")
 
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: list[Chunk],
-        ttl_seconds: int | None = None,
-    ) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         await index.insert_chunks(chunks)
 
     async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: dict[str, Any] | None = None,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         return await index.query_chunks(query, params)
diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index c4942fbce..e9527f101 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -12,11 +12,6 @@ from .config import QdrantVectorIOConfig
 async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .qdrant import QdrantVectorIOAdapter
 
-    impl = QdrantVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index eba8333e4..6838d69e9 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -30,11 +29,7 @@ from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import (
-    ChunkForDeletion,
-    EmbeddingIndex,
-    VectorDBWithIndex,
-)
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
 
 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
 
@@ -99,8 +94,7 @@ class QdrantIndex(EmbeddingIndex):
         chunk_ids = [convert_id(c.chunk_id) for c in chunks_for_deletion]
         try:
             await self.client.delete(
-                collection_name=self.collection_name,
-                points_selector=models.PointIdsList(points=chunk_ids),
+                collection_name=self.collection_name, points_selector=models.PointIdsList(points=chunk_ids)
             )
         except Exception as e:
             log.error(f"Error deleting chunks from Qdrant collection {self.collection_name}: {e}")
@@ -133,12 +127,7 @@ class QdrantIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         raise NotImplementedError("Keyword search is not supported in Qdrant")
 
     async def query_hybrid(
@@ -161,7 +150,6 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self,
         config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
         inference_api: Inference,
-        models_api: Models,
         files_api: Files | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
@@ -169,7 +157,6 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.client: AsyncQdrantClient = None
         self.cache = {}
         self.inference_api = inference_api
-        self.models_api = models_api
         self.vector_db_store = None
         self._qdrant_lock = asyncio.Lock()
 
@@ -184,11 +171,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
 
         for vector_db_data in stored_vector_dbs:
             vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(
-                vector_db,
-                QdrantIndex(self.client, vector_db.identifier),
-                self.inference_api,
-            )
+            index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api)
             self.cache[vector_db.identifier] = index
         self.openai_vector_stores = await self._load_openai_vector_stores()
 
@@ -197,18 +180,13 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(
-        self,
-        vector_db: VectorDB,
-    ) -> None:
+    async def register_vector_db(self, vector_db: VectorDB) -> None:
         assert self.kvstore is not None
         key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
         await self.kvstore.set(key=key, value=vector_db.model_dump_json())
 
         index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=QdrantIndex(self.client, vector_db.identifier),
-            inference_api=self.inference_api,
+            vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api
         )
 
         self.cache[vector_db.identifier] = index
@@ -240,12 +218,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.cache[vector_db_id] = index
         return index
 
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: list[Chunk],
-        ttl_seconds: int | None = None,
-    ) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
@@ -253,10 +226,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         await index.insert_chunks(chunks)
 
     async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: dict[str, Any] | None = None,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index 2040dad96..12e11d013 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -12,11 +12,6 @@ from .config import WeaviateVectorIOConfig
 async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .weaviate import WeaviateVectorIOAdapter
 
-    impl = WeaviateVectorIOAdapter(
-        config,
-        deps[Api.inference],
-        deps[Api.models],
-        deps.get(Api.files),
-    )
+    impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py
index 06242c6b4..66dbf1fed 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -21,11 +21,7 @@ class WeaviateVectorIOConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(
-        cls,
-        __distro_dir__: str,
-        **kwargs: Any,
-    ) -> dict[str, Any]:
+    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
             "weaviate_api_key": None,
             "weaviate_cluster_url": "${env.WEAVIATE_CLUSTER_URL:=localhost:8080}",
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 06ffc8706..8e7eb7267 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -16,7 +16,6 @@ from llama_stack.apis.common.content_types import InterleavedContent
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
 from llama_stack.core.request_headers import NeedsRequestProviderData
@@ -24,9 +23,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.memory.openai_vector_store_mixin import (
-    OpenAIVectorStoreMixin,
-)
+from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
     RERANKER_TYPE_RRF,
     ChunkForDeletion,
@@ -48,12 +45,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
 
 
 class WeaviateIndex(EmbeddingIndex):
-    def __init__(
-        self,
-        client: weaviate.WeaviateClient,
-        collection_name: str,
-        kvstore: KVStore | None = None,
-    ):
+    def __init__(self, client: weaviate.WeaviateClient, collection_name: str, kvstore: KVStore | None = None):
         self.client = client
         self.collection_name = sanitize_collection_name(collection_name, weaviate_format=True)
         self.kvstore = kvstore
@@ -108,9 +100,7 @@ class WeaviateIndex(EmbeddingIndex):
 
         try:
             results = collection.query.near_vector(
-                near_vector=embedding.tolist(),
-                limit=k,
-                return_metadata=wvc.query.MetadataQuery(distance=True),
+                near_vector=embedding.tolist(), limit=k, return_metadata=wvc.query.MetadataQuery(distance=True)
             )
         except Exception as e:
             log.error(f"Weaviate client vector search failed: {e}")
@@ -153,12 +143,7 @@ class WeaviateIndex(EmbeddingIndex):
         collection = self.client.collections.get(sanitized_collection_name)
         collection.data.delete_many(where=Filter.by_property("id").contains_any(chunk_ids))
 
-    async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
-    ) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         """
         Performs BM25-based keyword search using Weaviate's built-in full-text search.
         Args:
@@ -175,9 +160,7 @@ class WeaviateIndex(EmbeddingIndex):
         # Perform BM25 keyword search on chunk_content field
         try:
             results = collection.query.bm25(
-                query=query_string,
-                limit=k,
-                return_metadata=wvc.query.MetadataQuery(score=True),
+                query=query_string, limit=k, return_metadata=wvc.query.MetadataQuery(score=True)
             )
         except Exception as e:
             log.error(f"Weaviate client keyword search failed: {e}")
@@ -274,23 +257,11 @@ class WeaviateIndex(EmbeddingIndex):
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
 
-class WeaviateVectorIOAdapter(
-    OpenAIVectorStoreMixin,
-    VectorIO,
-    NeedsRequestProviderData,
-    VectorDBsProtocolPrivate,
-):
-    def __init__(
-        self,
-        config: WeaviateVectorIOConfig,
-        inference_api: Inference,
-        models_api: Models,
-        files_api: Files | None,
-    ) -> None:
+class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate):
+    def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.models_api = models_api
         self.client_cache = {}
         self.cache = {}
         self.vector_db_store = None
@@ -301,10 +272,7 @@ class WeaviateVectorIOAdapter(
             log.info("Using Weaviate locally in container")
             host, port = self.config.weaviate_cluster_url.split(":")
             key = "local_test"
-            client = weaviate.connect_to_local(
-                host=host,
-                port=port,
-            )
+            client = weaviate.connect_to_local(host=host, port=port)
         else:
             log.info("Using Weaviate remote cluster with URL")
             key = f"{self.config.weaviate_cluster_url}::{self.config.weaviate_api_key}"
@@ -334,15 +302,9 @@ class WeaviateVectorIOAdapter(
             for raw in stored:
                 vector_db = VectorDB.model_validate_json(raw)
                 client = self._get_client()
-                idx = WeaviateIndex(
-                    client=client,
-                    collection_name=vector_db.identifier,
-                    kvstore=self.kvstore,
-                )
+                idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore)
                 self.cache[vector_db.identifier] = VectorDBWithIndex(
-                    vector_db=vector_db,
-                    index=idx,
-                    inference_api=self.inference_api,
+                    vector_db=vector_db, index=idx, inference_api=self.inference_api
                 )
 
             # Load OpenAI vector stores metadata into cache
@@ -354,10 +316,7 @@ class WeaviateVectorIOAdapter(
         # Clean up mixin resources (file batch tasks)
         await super().shutdown()
 
-    async def register_vector_db(
-        self,
-        vector_db: VectorDB,
-    ) -> None:
+    async def register_vector_db(self, vector_db: VectorDB) -> None:
         client = self._get_client()
         sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
         # Create collection if it doesn't exist
@@ -366,17 +325,12 @@ class WeaviateVectorIOAdapter(
                 name=sanitized_collection_name,
                 vectorizer_config=wvc.config.Configure.Vectorizer.none(),
                 properties=[
-                    wvc.config.Property(
-                        name="chunk_content",
-                        data_type=wvc.config.DataType.TEXT,
-                    ),
+                    wvc.config.Property(name="chunk_content", data_type=wvc.config.DataType.TEXT),
                 ],
             )
 
         self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db,
-            WeaviateIndex(client=client, collection_name=sanitized_collection_name),
-            self.inference_api,
+            vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
         )
 
     async def unregister_vector_db(self, vector_db_id: str) -> None:
@@ -412,12 +366,7 @@ class WeaviateVectorIOAdapter(
         self.cache[vector_db_id] = index
         return index
 
-    async def insert_chunks(
-        self,
-        vector_db_id: str,
-        chunks: list[Chunk],
-        ttl_seconds: int | None = None,
-    ) -> None:
+    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
             raise VectorStoreNotFoundError(vector_db_id)
@@ -425,10 +374,7 @@ class WeaviateVectorIOAdapter(
         await index.insert_chunks(chunks)
 
     async def query_chunks(
-        self,
-        vector_db_id: str,
-        query: InterleavedContent,
-        params: dict[str, Any] | None = None,
+        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
         if not index:
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 0e550434e..7806d98c1 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -17,7 +17,6 @@ from pydantic import TypeAdapter
 
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files, OpenAIFileObject
-from llama_stack.apis.models import Model, Models
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -81,13 +80,14 @@ class OpenAIVectorStoreMixin(ABC):
     # Implementing classes should call super().__init__() in their __init__ method
     # to properly initialize the mixin attributes.
     def __init__(
-        self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None
+        self,
+        files_api: Files | None = None,
+        kvstore: KVStore | None = None,
     ):
         self.openai_vector_stores: dict[str, dict[str, Any]] = {}
         self.openai_file_batches: dict[str, dict[str, Any]] = {}
         self.files_api = files_api
         self.kvstore = kvstore
-        self.models_api = models_api
         self._last_file_batch_cleanup_time = 0
         self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
 
@@ -393,21 +393,7 @@ class OpenAIVectorStoreMixin(ABC):
         vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
 
         if embedding_model is None:
-            result = await self._get_default_embedding_model_and_dimension()
-            if result is None:
-                raise ValueError(
-                    "embedding_model is required in extra_body when creating a vector store. "
-                    "No default embedding model could be determined automatically."
-                )
-            embedding_model, embedding_dimension = result
-        elif embedding_dimension is None:
-            # Embedding model was provided but dimension wasn't, look it up
-            embedding_dimension = await self._get_embedding_dimension_for_model(embedding_model)
-            if embedding_dimension is None:
-                raise ValueError(
-                    f"Could not determine embedding dimension for model '{embedding_model}'. "
-                    "Please provide embedding_dimension in extra_body or ensure the model metadata contains embedding_dimension."
-                )
+            raise ValueError("embedding_model is required")
 
         if embedding_dimension is None:
             raise ValueError("Embedding dimension is required")
@@ -474,85 +460,6 @@ class OpenAIVectorStoreMixin(ABC):
         store_info = self.openai_vector_stores[vector_db_id]
         return VectorStoreObject.model_validate(store_info)
 
-    async def _get_embedding_models(self) -> list[Model]:
-        """Get list of embedding models from the models API."""
-        if not self.models_api:
-            return []
-
-        models_response = await self.models_api.list_models()
-        models_list = models_response.data if hasattr(models_response, "data") else models_response
-
-        embedding_models = []
-        for model in models_list:
-            if not isinstance(model, Model):
-                logger.warning(f"Non-Model object found in models list: {type(model)} - {model}")
-                continue
-            if model.model_type == "embedding":
-                embedding_models.append(model)
-
-        return embedding_models
-
-    async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None:
-        """Get embedding dimension for a specific model by looking it up in the models API.
-
-        Args:
-            model_id: The identifier of the embedding model (supports both prefixed and non-prefixed)
-
-        Returns:
-            The embedding dimension for the model, or None if not found
-        """
-        embedding_models = await self._get_embedding_models()
-
-        for model in embedding_models:
-            # Check for exact match first
-            if model.identifier == model_id:
-                embedding_dimension = model.metadata.get("embedding_dimension")
-                if embedding_dimension is not None:
-                    return int(embedding_dimension)
-                else:
-                    logger.warning(f"Model {model_id} found but has no embedding_dimension in metadata")
-                    return None
-
-            # Check for prefixed/unprefixed variations
-            # If model_id is unprefixed, check if it matches the resource_id
-            if model.provider_resource_id == model_id:
-                embedding_dimension = model.metadata.get("embedding_dimension")
-                if embedding_dimension is not None:
-                    return int(embedding_dimension)
-
-        return None
-
-    async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None:
-        """Get default embedding model from the models API.
-
-        Looks for embedding models marked with default_configured=True in metadata.
-        Returns None if no default embedding model is found.
-        Raises ValueError if multiple defaults are found.
-        """
-        embedding_models = await self._get_embedding_models()
-
-        default_models = []
-        for model in embedding_models:
-            if model.metadata.get("default_configured") is True:
-                default_models.append(model.identifier)
-
-        if len(default_models) > 1:
-            raise ValueError(
-                f"Multiple embedding models marked as default_configured=True: {default_models}. "
-                "Only one embedding model can be marked as default."
-            )
-
-        if default_models:
-            model_id = default_models[0]
-            embedding_dimension = await self._get_embedding_dimension_for_model(model_id)
-            if embedding_dimension is None:
-                raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata")
-            logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}")
-            return model_id, embedding_dimension
-
-        logger.debug("No default embedding models found")
-        return None
-
     async def openai_list_vector_stores(
         self,
         limit: int | None = 20,
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 3137de0de..a258eb1a0 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -317,3 +317,72 @@ def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
             if p.is_relative_to(rp):
                 return False
     return True
+
+
+def get_vector_io_provider_ids(client):
+    """Get all available vector_io provider IDs."""
+    providers = [p for p in client.providers.list() if p.api == "vector_io"]
+    return [p.provider_id for p in providers]
+
+
+def vector_provider_wrapper(func):
+    """Decorator to run a test against all available vector_io providers."""
+    import functools
+    import os
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        # Get the vector_io_provider_id from the test arguments
+        import inspect
+
+        sig = inspect.signature(func)
+        bound_args = sig.bind(*args, **kwargs)
+        bound_args.apply_defaults()
+
+        vector_io_provider_id = bound_args.arguments.get("vector_io_provider_id")
+        if not vector_io_provider_id:
+            pytest.skip("No vector_io_provider_id provided")
+
+        # Get client_with_models to check available providers
+        client_with_models = bound_args.arguments.get("client_with_models")
+        if client_with_models:
+            available_providers = get_vector_io_provider_ids(client_with_models)
+            if vector_io_provider_id not in available_providers:
+                pytest.skip(f"Provider '{vector_io_provider_id}' not available. Available: {available_providers}")
+
+        return func(*args, **kwargs)
+
+    # For replay tests, only use providers that are available in ci-tests environment
+    if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE") == "replay":
+        all_providers = ["faiss", "sqlite-vec"]
+    else:
+        # For live tests, try all providers (they'll skip if not available)
+        all_providers = [
+            "faiss",
+            "sqlite-vec",
+            "milvus",
+            "chromadb",
+            "pgvector",
+            "weaviate",
+            "qdrant",
+        ]
+
+    return pytest.mark.parametrize("vector_io_provider_id", all_providers)(wrapper)
+
+
+@pytest.fixture
+def vector_io_provider_id(request, client_with_models):
+    """Fixture that provides a specific vector_io provider ID, skipping if not available."""
+    if hasattr(request, "param"):
+        requested_provider = request.param
+        available_providers = get_vector_io_provider_ids(client_with_models)
+
+        if requested_provider not in available_providers:
+            pytest.skip(f"Provider '{requested_provider}' not available. Available: {available_providers}")
+
+        return requested_provider
+    else:
+        provider_ids = get_vector_io_provider_ids(client_with_models)
+        if not provider_ids:
+            pytest.skip("No vector_io providers available")
+        return provider_ids[0]
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index eb6840e60..ffd49033d 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient
 from openai import OpenAI
 
 from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.stack import run_config_from_adhoc_config_spec
 from llama_stack.env import get_env_or_fail
 
@@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session):
 
     if "=" in config:
         run_config = run_config_from_adhoc_config_spec(config)
+
+        # --stack-config bypasses template so need this to set default embedding model
+        if "vector_io" in config and "inference" in config:
+            run_config.vector_stores = VectorStoresConfig(
+                embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
+            )
+
         run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
         with open(run_config_file.name, "w") as f:
             yaml.dump(run_config.model_dump(mode="json"), f)
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index e21b233bc..626faf42d 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -8,14 +8,15 @@ import time
 from io import BytesIO
 
 import pytest
-from llama_stack_client import BadRequestError, NotFoundError
+from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
-from openai import NotFoundError as OpenAINotFoundError
 
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
 
+from ..conftest import vector_provider_wrapper
+
 logger = get_logger(name=__name__, category="vector_io")
 
 
@@ -133,8 +134,9 @@ def compat_client_with_empty_stores(compat_client):
     clear_files()
 
 
+@vector_provider_wrapper
 def test_openai_create_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test creating a vector store using OpenAI API."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -146,6 +148,7 @@ def test_openai_create_vector_store(
         metadata={"purpose": "testing", "environment": "integration"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -159,14 +162,18 @@ def test_openai_create_vector_store(
     assert hasattr(vector_store, "created_at")
 
 
-def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models):
+@vector_provider_wrapper
+def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models, vector_io_provider_id):
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    vector_store = compat_client_with_empty_stores.vector_stores.create()
+    vector_store = compat_client_with_empty_stores.vector_stores.create(
+        extra_body={"provider_id": vector_io_provider_id}
+    )
     assert vector_store.id
 
 
+@vector_provider_wrapper
 def test_openai_list_vector_stores(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test listing vector stores using OpenAI API."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -179,6 +186,7 @@ def test_openai_list_vector_stores(
         metadata={"type": "test"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
     store2 = client.vector_stores.create(
@@ -186,6 +194,7 @@ def test_openai_list_vector_stores(
         metadata={"type": "test"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -206,8 +215,9 @@ def test_openai_list_vector_stores(
     assert len(limited_response.data) == 1
 
 
+@vector_provider_wrapper
 def test_openai_retrieve_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test retrieving a specific vector store using OpenAI API."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -220,6 +230,7 @@ def test_openai_retrieve_vector_store(
         metadata={"purpose": "retrieval_test"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -233,8 +244,9 @@ def test_openai_retrieve_vector_store(
     assert retrieved_store.object == "vector_store"
 
 
+@vector_provider_wrapper
 def test_openai_update_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test modifying a vector store using OpenAI API."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -247,6 +259,7 @@ def test_openai_update_vector_store(
         metadata={"version": "1.0"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
     time.sleep(1)
@@ -264,8 +277,9 @@ def test_openai_update_vector_store(
     assert modified_store.last_active_at > created_store.last_active_at
 
 
+@vector_provider_wrapper
 def test_openai_delete_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test deleting a vector store using OpenAI API."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -278,6 +292,7 @@ def test_openai_delete_vector_store(
         metadata={"purpose": "deletion_test"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -294,8 +309,9 @@ def test_openai_delete_vector_store(
         client.vector_stores.retrieve(vector_store_id=created_store.id)
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_search_empty(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test searching an empty vector store using OpenAI API."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -308,6 +324,7 @@ def test_openai_vector_store_search_empty(
         metadata={"purpose": "search_testing"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -323,8 +340,14 @@ def test_openai_vector_store_search_empty(
     assert search_response.has_more is False
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_with_chunks(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
     """Test vector store functionality with actual chunks using both OpenAI and native APIs."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -338,6 +361,7 @@ def test_openai_vector_store_with_chunks(
         metadata={"purpose": "chunks_testing"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -380,6 +404,7 @@ def test_openai_vector_store_with_chunks(
         ("What inspires neural networks?", "doc4", "ai"),
     ],
 )
+@vector_provider_wrapper
 def test_openai_vector_store_search_relevance(
     compat_client_with_empty_stores,
     client_with_models,
@@ -387,6 +412,7 @@ def test_openai_vector_store_search_relevance(
     test_case,
     embedding_model_id,
     embedding_dimension,
+    vector_io_provider_id,
 ):
     """Test that OpenAI vector store search returns relevant results for different queries."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -402,6 +428,7 @@ def test_openai_vector_store_search_relevance(
         metadata={"purpose": "relevance_testing"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -430,8 +457,14 @@ def test_openai_vector_store_search_relevance(
     assert top_result.score > 0
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_search_with_ranking_options(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
     """Test OpenAI vector store search with ranking options."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -445,6 +478,7 @@ def test_openai_vector_store_search_with_ranking_options(
         metadata={"purpose": "ranking_testing"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -483,8 +517,14 @@ def test_openai_vector_store_search_with_ranking_options(
         assert result.score >= threshold
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_search_with_high_score_filter(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
     """Test that searching with text very similar to a document and high score threshold returns only that document."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -498,6 +538,7 @@ def test_openai_vector_store_search_with_high_score_filter(
         metadata={"purpose": "high_score_filtering"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -542,8 +583,14 @@ def test_openai_vector_store_search_with_high_score_filter(
     assert "python" in top_content.lower() or "programming" in top_content.lower()
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_search_with_max_num_results(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
     """Test OpenAI vector store search with max_num_results."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -557,6 +604,7 @@ def test_openai_vector_store_search_with_max_num_results(
         metadata={"purpose": "max_num_results_testing"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -577,8 +625,9 @@ def test_openai_vector_store_search_with_max_num_results(
     assert len(search_response.data) == 2
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_attach_file(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store attach file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -591,6 +640,7 @@ def test_openai_vector_store_attach_file(
         name="test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -637,8 +687,9 @@ def test_openai_vector_store_attach_file(
     assert "foobazbar" in top_content.lower()
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_attach_files_on_creation(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store attach files on creation."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -668,6 +719,7 @@ def test_openai_vector_store_attach_files_on_creation(
         file_ids=file_ids,
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -700,8 +752,9 @@ def test_openai_vector_store_attach_files_on_creation(
     assert updated_vector_store.file_counts.failed == 0
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_list_files(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store list files."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -714,6 +767,7 @@ def test_openai_vector_store_list_files(
         name="test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -773,8 +827,9 @@ def test_openai_vector_store_list_files(
     assert updated_vector_store.file_counts.in_progress == 0
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_list_files_invalid_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store list files with invalid vector store ID."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -783,14 +838,15 @@ def test_openai_vector_store_list_files_invalid_vector_store(
     if isinstance(compat_client, LlamaStackAsLibraryClient):
         errors = ValueError
     else:
-        errors = (NotFoundError, OpenAINotFoundError)
+        errors = (BadRequestError, OpenAIBadRequestError)
 
     with pytest.raises(errors):
         compat_client.vector_stores.files.list(vector_store_id="abc123")
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_retrieve_file_contents(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store retrieve file contents."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -803,6 +859,7 @@ def test_openai_vector_store_retrieve_file_contents(
         name="test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -848,8 +905,9 @@ def test_openai_vector_store_retrieve_file_contents(
     assert file_contents.attributes == attributes
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_delete_file(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store delete file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -862,6 +920,7 @@ def test_openai_vector_store_delete_file(
         name="test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -912,8 +971,9 @@ def test_openai_vector_store_delete_file(
     assert updated_vector_store.file_counts.in_progress == 0
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_delete_file_removes_from_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store delete file removes from vector store."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -926,6 +986,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
         name="test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -962,8 +1023,9 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
     assert not search_response.data
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_update_file(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test OpenAI vector store update file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -976,6 +1038,7 @@ def test_openai_vector_store_update_file(
         name="test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1017,8 +1080,9 @@ def test_openai_vector_store_update_file(
     assert retrieved_file.attributes["foo"] == "baz"
 
 
+@vector_provider_wrapper
 def test_create_vector_store_files_duplicate_vector_store_name(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """
     This test confirms that client.vector_stores.create() creates a unique ID
@@ -1044,6 +1108,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
         name="test_store_with_files",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
     assert vector_store.file_counts.completed == 0
@@ -1056,6 +1121,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
         name="test_store_with_files",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1086,8 +1152,15 @@ def test_create_vector_store_files_duplicate_vector_store_name(
 
 
 @pytest.mark.parametrize("search_mode", ["vector", "keyword", "hybrid"])
+@vector_provider_wrapper
 def test_openai_vector_store_search_modes(
-    llama_stack_client, client_with_models, sample_chunks, search_mode, embedding_model_id, embedding_dimension
+    llama_stack_client,
+    client_with_models,
+    sample_chunks,
+    search_mode,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
     skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_models, search_mode)
@@ -1097,6 +1170,7 @@ def test_openai_vector_store_search_modes(
         metadata={"purpose": "search_mode_testing"},
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1115,8 +1189,9 @@ def test_openai_vector_store_search_modes(
     assert search_response is not None
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_create_and_retrieve(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test creating and retrieving a vector store file batch."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -1128,6 +1203,7 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
         name="batch_test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1178,8 +1254,9 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
     assert retrieved_batch.status == "completed"  # Should be completed after processing
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_list_files(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test listing files in a vector store file batch."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -1191,6 +1268,7 @@ def test_openai_vector_store_file_batch_list_files(
         name="batch_list_test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1271,8 +1349,9 @@ def test_openai_vector_store_file_batch_list_files(
     assert first_page_ids.isdisjoint(second_page_ids)
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_cancel(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test cancelling a vector store file batch."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -1284,6 +1363,7 @@ def test_openai_vector_store_file_batch_cancel(
         name="batch_cancel_test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1326,8 +1406,9 @@ def test_openai_vector_store_file_batch_cancel(
         assert final_batch.status in ["completed", "cancelled"]
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_retrieve_contents(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test retrieving file contents after file batch processing."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -1339,6 +1420,7 @@ def test_openai_vector_store_file_batch_retrieve_contents(
         name="batch_contents_test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1399,8 +1481,9 @@ def test_openai_vector_store_file_batch_retrieve_contents(
         assert file_data[i][1].decode("utf-8") in content_text
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_error_handling(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test error handling for file batch operations."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -1412,6 +1495,7 @@ def test_openai_vector_store_file_batch_error_handling(
         name="batch_error_test_store",
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -1443,11 +1527,11 @@ def test_openai_vector_store_file_batch_error_handling(
             batch_id="non_existent_batch_id",
         )
 
-    # Test operations on non-existent vector store (returns NotFoundError)
+    # Test operations on non-existent vector store (returns BadRequestError)
     if isinstance(compat_client, LlamaStackAsLibraryClient):
         vector_store_errors = ValueError
     else:
-        vector_store_errors = (NotFoundError, OpenAINotFoundError)
+        vector_store_errors = (BadRequestError, OpenAIBadRequestError)
 
     with pytest.raises(vector_store_errors):  # Should raise an error for non-existent vector store
         compat_client.vector_stores.file_batches.create(
@@ -1456,8 +1540,9 @@ def test_openai_vector_store_file_batch_error_handling(
         )
 
 
+@vector_provider_wrapper
 def test_openai_vector_store_embedding_config_from_metadata(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     """Test that embedding configuration works from metadata source."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -1471,6 +1556,9 @@ def test_openai_vector_store_embedding_config_from_metadata(
             "embedding_dimension": str(embedding_dimension),
             "test_source": "metadata",
         },
+        extra_body={
+            "provider_id": vector_io_provider_id,
+        },
     )
 
     assert vector_store_metadata is not None
@@ -1489,6 +1577,7 @@ def test_openai_vector_store_embedding_config_from_metadata(
         extra_body={
             "embedding_model": embedding_model_id,
             "embedding_dimension": int(embedding_dimension),  # Ensure same type/value
+            "provider_id": vector_io_provider_id,
         },
     )
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index 653299338..e5ca7a0db 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -8,6 +8,8 @@ import pytest
 
 from llama_stack.apis.vector_io import Chunk
 
+from ..conftest import vector_provider_wrapper
+
 
 @pytest.fixture(scope="session")
 def sample_chunks():
@@ -46,12 +48,13 @@ def client_with_empty_registry(client_with_models):
     clear_registry()
 
 
-def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension):
+@vector_provider_wrapper
+def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
     vector_db_name = "test_vector_db"
     create_response = client_with_empty_registry.vector_stores.create(
         name=vector_db_name,
         extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -65,12 +68,13 @@ def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embe
     assert response.id.startswith("vs_")
 
 
-def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension):
+@vector_provider_wrapper
+def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
     vector_db_name = "test_vector_db"
     response = client_with_empty_registry.vector_stores.create(
         name=vector_db_name,
         extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -100,12 +104,15 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
         ("How does machine learning improve over time?", "doc2"),
     ],
 )
-def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case):
+@vector_provider_wrapper
+def test_insert_chunks(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id
+):
     vector_db_name = "test_vector_db"
     create_response = client_with_empty_registry.vector_stores.create(
         name=vector_db_name,
         extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -135,7 +142,10 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding
     assert top_match.metadata["document_id"] == expected_doc_id, f"Query '{query}' should match {expected_doc_id}"
 
 
-def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id, embedding_dimension):
+@vector_provider_wrapper
+def test_insert_chunks_with_precomputed_embeddings(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
     vector_io_provider_params_dict = {
         "inline::milvus": {"score_threshold": -1.0},
         "inline::qdrant": {"score_threshold": -1.0},
@@ -145,7 +155,7 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
     register_response = client_with_empty_registry.vector_stores.create(
         name=vector_db_name,
         extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -181,8 +191,9 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
 
 
 # expect this test to fail
+@vector_provider_wrapper
 def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
-    client_with_empty_registry, embedding_model_id, embedding_dimension
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
     vector_io_provider_params_dict = {
         "inline::milvus": {"score_threshold": 0.0},
@@ -194,6 +205,7 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
         name=vector_db_name,
         extra_body={
             "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
         },
     )
 
@@ -226,33 +238,44 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
     assert response.chunks[0].metadata["source"] == "precomputed"
 
 
-def test_auto_extract_embedding_dimension(client_with_empty_registry, embedding_model_id):
+@vector_provider_wrapper
+def test_auto_extract_embedding_dimension(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    # This test specifically tests embedding model override, so we keep embedding_model
     vs = client_with_empty_registry.vector_stores.create(
-        name="test_auto_extract", extra_body={"embedding_model": embedding_model_id}
+        name="test_auto_extract",
+        extra_body={"embedding_model": embedding_model_id, "provider_id": vector_io_provider_id},
     )
     assert vs.id is not None
 
 
-def test_provider_auto_selection_single_provider(client_with_empty_registry, embedding_model_id):
+@vector_provider_wrapper
+def test_provider_auto_selection_single_provider(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
     providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
     if len(providers) != 1:
         pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")
 
-    vs = client_with_empty_registry.vector_stores.create(
-        name="test_auto_provider", extra_body={"embedding_model": embedding_model_id}
-    )
+    # Test that when only one provider is available, it's auto-selected (no provider_id needed)
+    vs = client_with_empty_registry.vector_stores.create(name="test_auto_provider")
     assert vs.id is not None
 
 
-def test_provider_id_override(client_with_empty_registry, embedding_model_id):
+@vector_provider_wrapper
+def test_provider_id_override(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
     providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
     if len(providers) != 1:
         pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")
 
     provider_id = providers[0].provider_id
 
+    # Test explicit provider_id specification (using default embedding model)
     vs = client_with_empty_registry.vector_stores.create(
-        name="test_provider_override", extra_body={"embedding_model": embedding_model_id, "provider_id": provider_id}
+        name="test_provider_override", extra_body={"provider_id": provider_id}
     )
     assert vs.id is not None
     assert vs.metadata.get("provider_id") == provider_id
diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py
index 5fc27e199..fa5348d1c 100644
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@@ -4,90 +4,64 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-"""
-Unit tests for Stack validation functions.
-"""
+"""Unit tests for Stack validation functions."""
 
 from unittest.mock import AsyncMock
 
 import pytest
 
-from llama_stack.apis.models import Model, ModelType
-from llama_stack.core.stack import validate_default_embedding_model
+from llama_stack.apis.models import ListModelsResponse, Model, ModelType
+from llama_stack.core.datatypes import QualifiedModel, StackRunConfig, StorageConfig, VectorStoresConfig
+from llama_stack.core.stack import validate_vector_stores_config
 from llama_stack.providers.datatypes import Api
 
 
-class TestStackValidation:
-    """Test Stack validation functions."""
+class TestVectorStoresValidation:
+    async def test_validate_missing_model(self):
+        """Test validation fails when model not found."""
+        run_config = StackRunConfig(
+            image_name="test",
+            providers={},
+            storage=StorageConfig(backends={}, stores={}),
+            vector_stores=VectorStoresConfig(
+                default_provider_id="faiss",
+                default_embedding_model=QualifiedModel(
+                    provider_id="p",
+                    model_id="missing",
+                ),
+            ),
+        )
+        mock_models = AsyncMock()
+        mock_models.list_models.return_value = ListModelsResponse(data=[])
 
-    @pytest.mark.parametrize(
-        "models,should_raise",
-        [
-            ([], False),  # No models
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    )
-                ],
-                False,
-            ),  # Single default
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    ),
-                    Model(
-                        identifier="emb2",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb2",
-                    ),
-                ],
-                True,
-            ),  # Multiple defaults
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    ),
-                    Model(
-                        identifier="llm1",
-                        model_type=ModelType.llm,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="llm1",
-                    ),
-                ],
-                False,
-            ),  # Ignores non-embedding
-        ],
-    )
-    async def test_validate_default_embedding_model(self, models, should_raise):
-        """Test validation with various model configurations."""
-        mock_models_impl = AsyncMock()
-        mock_models_impl.list_models.return_value = models
-        impls = {Api.models: mock_models_impl}
+        with pytest.raises(ValueError, match="not found"):
+            await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models})
 
-        if should_raise:
-            with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
-                await validate_default_embedding_model(impls)
-        else:
-            await validate_default_embedding_model(impls)
+    async def test_validate_success(self):
+        """Test validation passes with valid model."""
+        run_config = StackRunConfig(
+            image_name="test",
+            providers={},
+            storage=StorageConfig(backends={}, stores={}),
+            vector_stores=VectorStoresConfig(
+                default_provider_id="faiss",
+                default_embedding_model=QualifiedModel(
+                    provider_id="p",
+                    model_id="valid",
+                ),
+            ),
+        )
+        mock_models = AsyncMock()
+        mock_models.list_models.return_value = ListModelsResponse(
+            data=[
+                Model(
+                    identifier="p/valid",  # Must match provider_id/model_id format
+                    model_type=ModelType.embedding,
+                    metadata={"embedding_dimension": 768},
+                    provider_id="p",
+                    provider_resource_id="valid",
+                )
+            ]
+        )
 
-    async def test_validate_default_embedding_model_no_models_api(self):
-        """Test validation when models API is not available."""
-        await validate_default_embedding_model({})
+        await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models})
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 6d0367beb..c78596018 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -146,7 +146,6 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf
         config=config,
         inference_api=mock_inference_api,
         files_api=None,
-        models_api=None,
     )
     collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}"
     await adapter.initialize()
@@ -185,7 +184,6 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding
         config=config,
         inference_api=mock_inference_api,
         files_api=None,
-        models_api=None,
     )
     await adapter.initialize()
     await adapter.register_vector_db(
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 76969b711..fa5c5f56b 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -11,7 +11,6 @@ import numpy as np
 import pytest
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.datatypes import HealthStatus
@@ -76,12 +75,6 @@ def mock_files_api():
     return mock_api
 
 
-@pytest.fixture
-def mock_models_api():
-    mock_api = MagicMock(spec=Models)
-    return mock_api
-
-
 @pytest.fixture
 def faiss_config():
     config = MagicMock(spec=FaissVectorIOConfig)
@@ -117,7 +110,7 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_
         assert response.chunks[1] == sample_chunks[1]
 
 
-async def test_health_success(mock_models_api):
+async def test_health_success():
     """Test that the health check returns OK status when faiss is working correctly."""
     # Create a fresh instance of FaissVectorIOAdapter for testing
     config = MagicMock()
@@ -126,9 +119,7 @@ async def test_health_success(mock_models_api):
 
     with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat:
         mock_index_flat.return_value = MagicMock()
-        adapter = FaissVectorIOAdapter(
-            config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api
-        )
+        adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api)
 
         # Calling the health method directly
         response = await adapter.health()
@@ -142,7 +133,7 @@ async def test_health_success(mock_models_api):
         mock_index_flat.assert_called_once_with(128)  # VECTOR_DIMENSION is 128
 
 
-async def test_health_failure(mock_models_api):
+async def test_health_failure():
     """Test that the health check returns ERROR status when faiss encounters an error."""
     # Create a fresh instance of FaissVectorIOAdapter for testing
     config = MagicMock()
@@ -152,9 +143,7 @@ async def test_health_failure(mock_models_api):
     with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat:
         mock_index_flat.side_effect = Exception("Test error")
 
-        adapter = FaissVectorIOAdapter(
-            config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api
-        )
+        adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api)
 
         # Calling the health method directly
         response = await adapter.health()
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 32d59c91b..ad55b9336 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -6,13 +6,12 @@
 
 import json
 import time
-from unittest.mock import AsyncMock, Mock, patch
+from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
 
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
     assert batch.file_counts.in_progress == 8
 
 
-async def test_get_default_embedding_model_success(vector_io_adapter):
-    """Test successful default embedding model detection."""
-    # Mock models API with a default model
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="nomic-embed-text-v1.5",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={
-                        "embedding_dimension": 768,
-                        "default_configured": True,
-                    },
-                )
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-    result = await vector_io_adapter._get_default_embedding_model_and_dimension()
-
-    assert result is not None
-    model_id, dimension = result
-    assert model_id == "nomic-embed-text-v1.5"
-    assert dimension == 768
-
-
-async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter):
-    """Test error when multiple models are marked as default."""
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="model1",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 768, "default_configured": True},
-                ),
-                Model(
-                    identifier="model2",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 512, "default_configured": True},
-                ),
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-
-    with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
-        await vector_io_adapter._get_default_embedding_model_and_dimension()
-
-
-async def test_openai_create_vector_store_uses_default_model(vector_io_adapter):
-    """Test that vector store creation uses default embedding model when none specified."""
-    # Mock models API and dependencies
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="default-model",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 512, "default_configured": True},
-                )
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-    vector_io_adapter.register_vector_db = AsyncMock()
-    vector_io_adapter.__provider_id__ = "test-provider"
-
-    # Create vector store without specifying embedding model
-    params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store")
-    result = await vector_io_adapter.openai_create_vector_store(params)
-
-    # Verify the vector store was created with default model
-    assert result.name == "test-store"
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
-    assert call_args.embedding_model == "default-model"
-    assert call_args.embedding_dimension == 512
-
-
 async def test_embedding_config_from_metadata(vector_io_adapter):
     """Test that embedding configuration is correctly extracted from metadata."""
 
@@ -1253,5 +1162,5 @@ async def test_embedding_config_required_model_missing(vector_io_adapter):
     # Test with no embedding model provided
     params = OpenAICreateVectorStoreRequestWithExtraBody(name="test_store", metadata={})
 
-    with pytest.raises(ValueError, match="embedding_model is required in extra_body when creating a vector store"):
+    with pytest.raises(ValueError, match="embedding_model is required"):
         await vector_io_adapter.openai_create_vector_store(params)