feat: Adding Demo script (#3870 )

# What does this PR do? Updated quickstart `demo_script.py` to use OpenAI APIs, which is simply: ```python import io, requests from openai import OpenAI url="https://www.paulgraham.com/greatwork.html" client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none") vs = client.vector_stores.create() response = requests.get(url) pseudo_file = io.BytesIO(str(response.content).encode('utf-8')) uploaded_file = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants") client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.id) resp = client.responses.create( model="openai/gpt-4o", input="How do you do great work? Use the existing knowledge_search tool.", tools=[{"type": "file_search", "vector_store_ids": [vs.id]}], include=["file_search_call.results"], ) print(resp) ```   ## Test Plan  --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
build: Bump version to 0.3.0
2025-10-22 08:17:18 +00:00 · 2025-10-21 21:31:21 -04:00 · 2025-10-21 23:59:09 +00:00 · 2025-10-21 16:39:08 -07:00 · 2025-10-21 15:47:55 -07:00 · 2025-10-21 14:40:14 -07:00
117 changed files with 52193 additions and 4095 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -86,10 +86,9 @@ runs:
      if: ${{ always() }}
      shell: bash
      run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
-        distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
-        stack_container_name="llama-stack-test-$distro_name"
-        sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
+        # Ollama logs (if ollama container exists)
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        # Note: distro container logs are now dumped in integration-tests.sh before container is removed

    - name: Upload logs
      if: ${{ always() }}
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -37,7 +37,7 @@ jobs:
            .pre-commit-config.yaml

      - name: Set up Node.js
-        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
        with:
          node-version: '20'
          cache: 'npm'
--- a/.github/workflows/precommit-trigger.yml
+++ b/.github/workflows/precommit-trigger.yml
@ -99,7 +99,7 @@ jobs:
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: ${{ steps.check_author.outputs.pr_number }},
-              body: `⏳ Running pre-commit hooks on PR #${{ steps.check_author.outputs.pr_number }}...`
+              body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...`
            });

      - name: Checkout PR branch (same-repo)
@ -141,7 +141,7 @@ jobs:

      - name: Set up Node.js
        if: steps.check_author.outputs.authorized == 'true'
-        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
        with:
          node-version: '20'
          cache: 'npm'
--- a/.github/workflows/providers-list-deps.yml
+++ b/.github/workflows/providers-list-deps.yml
@ -36,7 +36,7 @@ jobs:
      distros: ${{ steps.set-matrix.outputs.distros }}
    steps:
      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Generate Distribution List
        id: set-matrix
@ -55,7 +55,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Install dependencies
        uses: ./.github/actions/setup-runner
@ -79,7 +79,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Install dependencies
        uses: ./.github/actions/setup-runner
@ -92,7 +92,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Install dependencies
        uses: ./.github/actions/setup-runner
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@ -24,7 +24,7 @@ jobs:
      uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

    - name: Install uv
-      uses: astral-sh/setup-uv@eb1897b8dc4b5d5bfe39a428a8f2304605e0983c # v7.0.0
+      uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
      with:
        python-version: ${{ matrix.python-version }}
        activate-environment: true
--- a/.github/workflows/ui-unit-tests.yml
+++ b/.github/workflows/ui-unit-tests.yml
@ -29,7 +29,7 @@ jobs:
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Setup Node.js
-        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
        with:
          node-version: ${{ matrix.node-version }}
          cache: 'npm'
--- a/client-sdks/stainless/openapi.stainless.yml
+++ b/client-sdks/stainless/openapi.stainless.yml
@ -208,19 +208,6 @@ resources:
            type: http
            endpoint: post /v1/conversations/{conversation_id}/items

-  datasets:
-    models:
-      list_datasets_response: ListDatasetsResponse
-    methods:
-      register: post /v1beta/datasets
-      retrieve: get /v1beta/datasets/{dataset_id}
-      list:
-        endpoint: get /v1beta/datasets
-        paginated: false
-      unregister: delete /v1beta/datasets/{dataset_id}
-      iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
-      appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
-
  inspect:
    models:
      healthInfo: HealthInfo
@ -521,6 +508,21 @@ resources:
                  stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
                  param_discriminator: stream

+  beta:
+    subresources:
+      datasets:
+        models:
+          list_datasets_response: ListDatasetsResponse
+        methods:
+          register: post /v1beta/datasets
+          retrieve: get /v1beta/datasets/{dataset_id}
+          list:
+            endpoint: get /v1beta/datasets
+            paginated: false
+          unregister: delete /v1beta/datasets/{dataset_id}
+          iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
+          appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
+

 settings:
  license: MIT
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -6440,7 +6440,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -9132,7 +9132,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -9440,7 +9440,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -10203,7 +10203,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -11325,7 +11325,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -12652,7 +12652,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@ -19,6 +19,7 @@ Browse that folder to understand available providers and copy a distribution to

 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
+
 <Tabs>
 <TabItem value="container" label="Building a container">

--- a/docs/docs/getting_started/demo_script.py
+++ b/docs/docs/getting_started/demo_script.py
@ -4,65 +4,24 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient

-vector_db_id = "my_demo_vector_db"
-client = LlamaStackClient(base_url="http://localhost:8321")
+import io, requests
+from openai import OpenAI

-models = client.models.list()
+url="https://www.paulgraham.com/greatwork.html"
+client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none")

-# Select the first LLM and first embedding models
-model_id = next(m for m in models if m.model_type == "llm").identifier
-embedding_model_id = (
-    em := next(m for m in models if m.model_type == "embedding")
-).identifier
-embedding_dimension = em.metadata["embedding_dimension"]
+vs = client.vector_stores.create()
+response = requests.get(url)
+pseudo_file = io.BytesIO(str(response.content).encode('utf-8'))
+uploaded_file = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants")
+client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.id)

-vector_db = client.vector_dbs.register(
-    vector_db_id=vector_db_id,
-    embedding_model=embedding_model_id,
-    embedding_dimension=embedding_dimension,
-    provider_id="faiss",
-)
-vector_db_id = vector_db.identifier
-source = "https://www.paulgraham.com/greatwork.html"
-print("rag_tool> Ingesting document:", source)
-document = RAGDocument(
-    document_id="document_1",
-    content=source,
-    mime_type="text/html",
-    metadata={},
-)
-client.tool_runtime.rag_tool.insert(
-    documents=[document],
-    vector_db_id=vector_db_id,
-    chunk_size_in_tokens=100,
-)
-agent = Agent(
-    client,
-    model=model_id,
-    instructions="You are a helpful assistant",
-    tools=[
-        {
-            "name": "builtin::rag/knowledge_search",
-            "args": {"vector_db_ids": [vector_db_id]},
-        }
-    ],
+resp = client.responses.create(
+    model="openai/gpt-4o",
+    input="How do you do great work? Use the existing knowledge_search tool.",
+    tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
+    include=["file_search_call.results"],
 )

-prompt = "How do you do great work?"
-print("prompt>", prompt)
-
-use_stream = True
-response = agent.create_turn(
-    messages=[{"role": "user", "content": prompt}],
-    session_id=agent.create_session("rag_session"),
-    stream=use_stream,
-)
-
-# Only call `AgentEventLogger().log(response)` for streaming responses.
-if use_stream:
-    for log in AgentEventLogger().log(response):
-        log.print()
-else:
-    print(response)
+print(resp)
--- a/docs/docs/getting_started/quickstart.mdx
+++ b/docs/docs/getting_started/quickstart.mdx
@ -35,103 +35,51 @@ OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run star
 #### Step 3: Run the demo
 Now open up a new terminal and copy the following script into a file named `demo_script.py`.

-```python title="demo_script.py"
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
+```python
+import io, requests
+from openai import OpenAI

-from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
+url="https://www.paulgraham.com/greatwork.html"
+client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none")

-vector_db_id = "my_demo_vector_db"
-client = LlamaStackClient(base_url="http://localhost:8321")
+vs = client.vector_stores.create()
+response = requests.get(url)
+pseudo_file = io.BytesIO(str(response.content).encode('utf-8'))
+uploaded_file = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants")
+client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.id)

-models = client.models.list()
-
-# Select the first LLM and first embedding models
-model_id = next(m for m in models if m.model_type == "llm").identifier
-embedding_model_id = (
-    em := next(m for m in models if m.model_type == "embedding")
-).identifier
-embedding_dimension = em.metadata["embedding_dimension"]
-
-vector_db = client.vector_dbs.register(
-    vector_db_id=vector_db_id,
-    embedding_model=embedding_model_id,
-    embedding_dimension=embedding_dimension,
-    provider_id="faiss",
-)
-vector_db_id = vector_db.identifier
-source = "https://www.paulgraham.com/greatwork.html"
-print("rag_tool> Ingesting document:", source)
-document = RAGDocument(
-    document_id="document_1",
-    content=source,
-    mime_type="text/html",
-    metadata={},
-)
-client.tool_runtime.rag_tool.insert(
-    documents=[document],
-    vector_db_id=vector_db_id,
-    chunk_size_in_tokens=100,
-)
-agent = Agent(
-    client,
-    model=model_id,
-    instructions="You are a helpful assistant",
-    tools=[
-        {
-            "name": "builtin::rag/knowledge_search",
-            "args": {"vector_db_ids": [vector_db_id]},
-        }
-    ],
+resp = client.responses.create(
+    model="openai/gpt-4o",
+    input="How do you do great work? Use the existing knowledge_search tool.",
+    tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
+    include=["file_search_call.results"],
 )

-prompt = "How do you do great work?"
-print("prompt>", prompt)

-use_stream = True
-response = agent.create_turn(
-    messages=[{"role": "user", "content": prompt}],
-    session_id=agent.create_session("rag_session"),
-    stream=use_stream,
-)
-
-# Only call `AgentEventLogger().log(response)` for streaming responses.
-if use_stream:
-    for log in AgentEventLogger().log(response):
-        log.print()
-else:
-    print(response)
-```
 We will use `uv` to run the script
 ```
 uv run --with llama-stack-client,fire,requests demo_script.py
 ```
 And you should see output like below.
+```python
+>print(resp.output[1].content[0].text)
+To do great work, consider the following principles:
+
+1. **Follow Your Interests**: Engage in work that genuinely excites you. If you find an area intriguing, pursue it without being overly concerned about external pressures or norms. You should create things that you would want for yourself, as this often aligns with what others in your circle might want too.
+
+2. **Work Hard on Ambitious Projects**: Ambition is vital, but it should be tempered by genuine interest. Instead of detailed planning for the future, focus on exciting projects that keep your options open. This approach, known as "staying upwind," allows for adaptability and can lead to unforeseen achievements.
+
+3. **Choose Quality Colleagues**: Collaborating with talented colleagues can significantly affect your own work. Seek out individuals who offer surprising insights and whom you admire. The presence of good colleagues can elevate the quality of your work and inspire you.
+
+4. **Maintain High Morale**: Your attitude towards work and life affects your performance. Cultivating optimism and viewing yourself as lucky rather than victimized can boost your productivity. It’s essential to care for your physical health as well since it directly impacts your mental faculties and morale.
+
+5. **Be Consistent**: Great work often comes from cumulative effort. Daily progress, even in small amounts, can result in substantial achievements over time. Emphasize consistency and make the work engaging, as this reduces the perceived burden of hard labor.
+
+6. **Embrace Curiosity**: Curiosity is a driving force that can guide you in selecting fields of interest, pushing you to explore uncharted territories. Allow it to shape your work and continually seek knowledge and insights.
+
+By focusing on these aspects, you can create an environment conducive to great work and personal fulfillment.
 ```
-rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html

-prompt> How do you do great work?
-
-inference> [knowledge_search(query="What is the key to doing great work")]
-
-tool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}
-
-tool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n', type='text'), TextContentItem(text="Result 1:\nDocument_id:docum\nContent:  work. Doing great work means doing something important\nso well that you expand people's ideas of what's possible. But\nthere's no threshold for importance. It's a matter of degree, and\noften hard to judge at the time anyway.\n", type='text'), TextContentItem(text="Result 2:\nDocument_id:docum\nContent:  work. Doing great work means doing something important\nso well that you expand people's ideas of what's possible. But\nthere's no threshold for importance. It's a matter of degree, and\noften hard to judge at the time anyway.\n", type='text'), TextContentItem(text="Result 3:\nDocument_id:docum\nContent:  work. Doing great work means doing something important\nso well that you expand people's ideas of what's possible. But\nthere's no threshold for importance. It's a matter of degree, and\noften hard to judge at the time anyway.\n", type='text'), TextContentItem(text="Result 4:\nDocument_id:docum\nContent:  work. Doing great work means doing something important\nso well that you expand people's ideas of what's possible. But\nthere's no threshold for importance. It's a matter of degree, and\noften hard to judge at the time anyway.\n", type='text'), TextContentItem(text="Result 5:\nDocument_id:docum\nContent:  work. Doing great work means doing something important\nso well that you expand people's ideas of what's possible. But\nthere's no threshold for importance. It's a matter of degree, and\noften hard to judge at the time anyway.\n", type='text'), TextContentItem(text='END of knowledge_search tool results.\n', type='text')]
-
-inference> Based on the search results, it seems that doing great work means doing something important so well that you expand people's ideas of what's possible. However, there is no clear threshold for importance, and it can be difficult to judge at the time.
-
-To further clarify, I would suggest that doing great work involves:
-
-* Completing tasks with high quality and attention to detail
-* Expanding on existing knowledge or ideas
-* Making a positive impact on others through your work
-* Striving for excellence and continuous improvement
-
-Ultimately, great work is about making a meaningful contribution and leaving a lasting impression.
-```
 Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳

 :::tip HuggingFace access
--- a/docs/docs/references/llama_stack_client_cli_reference.md
+++ b/docs/docs/references/llama_stack_client_cli_reference.md
@ -32,7 +32,6 @@ Commands:
  scoring_functions  Manage scoring functions.
  shields            Manage safety shield services.
  toolgroups         Manage available tool groups.
-  vector_dbs         Manage vector databases.
 ```

 ### `llama-stack-client configure`
@ -211,53 +210,6 @@ Unregister a model from distribution endpoint
 llama-stack-client models unregister <model_id>
 ```

-## Vector DB Management
-Manage vector databases.
-
-
-### `llama-stack-client vector_dbs list`
-Show available vector dbs on distribution endpoint
-```bash
-llama-stack-client vector_dbs list
-```
-```
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ identifier               ┃ provider_id ┃ provider_resource_id     ┃ vector_db_type ┃ params                            ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ my_demo_vector_db        │ faiss       │ my_demo_vector_db        │                │ embedding_dimension: 768          │
-│                          │             │                          │                │ embedding_model: nomic-embed-text-v1.5 │
-│                          │             │                          │                │ type: vector_db                   │
-│                          │             │                          │                │                                   │
-└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘
-```
-
-### `llama-stack-client vector_dbs register`
-Create a new vector db
-```bash
-llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
-```
-
-
-Required arguments:
- `VECTOR_DB_ID`: Vector DB ID
-
-Optional arguments:
- `--provider-id`: Provider ID for the vector db
- `--provider-vector-db-id`: Provider's vector db ID
- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5`
- `--embedding-dimension`: Dimension of embeddings. Default: 768
-
-### `llama-stack-client vector_dbs unregister`
-Delete a vector db
-```bash
-llama-stack-client vector_dbs unregister <vector-db-id>
-```
-
-
-Required arguments:
- `VECTOR_DB_ID`: Vector DB ID
-
-
 ## Shield Management
 Manage safety shield services.
 ### `llama-stack-client shields list`
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -126,17 +126,31 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "J2kGed0R5PSf",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
-    "collapsed": true,
    "id": "J2kGed0R5PSf",
    "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m52 packages\u001b[0m \u001b[2min 1.56s\u001b[0m\u001b[0m\n",
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 122ms\u001b[0m\u001b[0m\n",
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 197ms\u001b[0m\u001b[0m\n",
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 11ms\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
   "source": [
    "import os\n",
    "import subprocess\n",
@ -150,7 +164,7 @@
    "def run_llama_stack_server_background():\n",
    "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
    "    process = subprocess.Popen(\n",
-    "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
+    "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\",\n",
    "        shell=True,\n",
    "        stdout=log_file,\n",
    "        stderr=log_file,\n",
@ -200,7 +214,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
   "id": "f779283d",
   "metadata": {},
   "outputs": [
@ -208,8 +222,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Starting Llama Stack server with PID: 787100\n",
-      "Waiting for server to start\n",
+      "Starting Llama Stack server with PID: 20778\n",
+      "Waiting for server to start........\n",
      "Server is ready!\n"
     ]
    }
@ -229,65 +243,84 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
   "id": "7da71011",
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/files \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector_stores \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/conversations \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/responses \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
      "prompt> How do you do great work?\n",
-      "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
-      "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[30m\u001b[0m"
+      "🤔 Doing great work involves a combination of skills, habits, and mindsets. Here are some key principles:\n",
+      "\n",
+      "1. **Set Clear Goals**: Start with a clear vision of what you want to achieve. Define specific, measurable, achievable, relevant, and time-bound (SMART) goals.\n",
+      "\n",
+      "2. **Plan and Prioritize**: Break your goals into smaller, manageable tasks. Prioritize these tasks based on their importance and urgency.\n",
+      "\n",
+      "3. **Focus on Quality**: Aim for high-quality outcomes rather than just finishing tasks. Pay attention to detail, and ensure your work meets or exceeds standards.\n",
+      "\n",
+      "4. **Stay Organized**: Keep your workspace, both physical and digital, organized to help you stay focused and efficient.\n",
+      "\n",
+      "5. **Manage Your Time**: Use time management techniques such as the Pomodoro Technique, time blocking, or the Eisenhower Box to maximize productivity.\n",
+      "\n",
+      "6. **Seek Feedback and Learn**: Regularly seek feedback from peers, mentors, or supervisors. Use constructive criticism to improve continuously.\n",
+      "\n",
+      "7. **Innovate and Improve**: Look for ways to improve processes or introduce new ideas. Be open to change and willing to adapt.\n",
+      "\n",
+      "8. **Stay Motivated and Persistent**: Keep your end goals in mind to stay motivated. Overcome setbacks with resilience and persistence.\n",
+      "\n",
+      "9. **Balance and Rest**: Ensure you maintain a healthy work-life balance. Take breaks and manage stress to sustain long-term productivity.\n",
+      "\n",
+      "10. **Reflect and Adjust**: Regularly assess your progress and adjust your strategies as needed. Reflect on what works well and what doesn't.\n",
+      "\n",
+      "By incorporating these elements, you can consistently produce high-quality work and achieve excellence in your endeavors.\n"
     ]
    }
   ],
   "source": [
    "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
+    "import requests\n",
    "\n",
-    "vector_db_id = \"my_demo_vector_db\"\n",
+    "vector_store_id = \"my_demo_vector_db\"\n",
    "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
    "\n",
    "models = client.models.list()\n",
    "\n",
    "# Select the first ollama and first ollama's embedding model\n",
    "model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
-    "embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
-    "embedding_model_id = embedding_model.identifier\n",
-    "embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
    "\n",
-    "_ = client.vector_dbs.register(\n",
-    "    vector_db_id=vector_db_id,\n",
-    "    embedding_model=embedding_model_id,\n",
-    "    embedding_dimension=embedding_dimension,\n",
-    "    provider_id=\"faiss\",\n",
-    ")\n",
+    "\n",
    "source = \"https://www.paulgraham.com/greatwork.html\"\n",
-    "print(\"rag_tool> Ingesting document:\", source)\n",
-    "document = RAGDocument(\n",
-    "    document_id=\"document_1\",\n",
-    "    content=source,\n",
-    "    mime_type=\"text/html\",\n",
-    "    metadata={},\n",
+    "response = requests.get(source)\n",
+    "file = client.files.create(\n",
+    "    file=response.content,\n",
+    "    purpose='assistants'\n",
    ")\n",
-    "client.tool_runtime.rag_tool.insert(\n",
-    "    documents=[document],\n",
-    "    vector_db_id=vector_db_id,\n",
-    "    chunk_size_in_tokens=50,\n",
+    "vector_store = client.vector_stores.create(\n",
+    "    name=vector_store_id,\n",
+    "    file_ids=[file.id],\n",
    ")\n",
+    "\n",
    "agent = Agent(\n",
    "    client,\n",
    "    model=model_id,\n",
    "    instructions=\"You are a helpful assistant\",\n",
    "    tools=[\n",
    "        {\n",
-    "            \"name\": \"builtin::rag/knowledge_search\",\n",
-    "            \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
+    "            \"type\": \"file_search\",\n",
+    "            \"vector_store_ids\": [vector_store_id],\n",
    "        }\n",
    "    ],\n",
    ")\n",
@ -302,7 +335,7 @@
    ")\n",
    "\n",
    "for log in AgentEventLogger().log(response):\n",
-    "    log.print()"
+    "    print(log, end=\"\")"
   ]
  },
  {
@ -344,7 +377,7 @@
   "provenance": []
  },
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@ -358,7 +391,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.12.12"
  }
 },
 "nbformat": 4,
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -5547,7 +5547,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -5798,7 +5798,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -4114,7 +4114,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -4303,7 +4303,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -1850,7 +1850,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -3983,7 +3983,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -1320,7 +1320,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -2927,7 +2927,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -6800,7 +6800,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -10205,7 +10205,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -10687,7 +10687,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -11740,7 +11740,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -5227,7 +5227,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -7919,7 +7919,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -8227,7 +8227,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -8990,7 +8990,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -8472,7 +8472,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -11877,7 +11877,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -12359,7 +12359,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -13412,7 +13412,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -14959,7 +14959,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
@ -16704,7 +16704,7 @@
                        "enum": [
                            "model",
                            "shield",
-                            "vector_db",
+                            "vector_store",
                            "dataset",
                            "scoring_function",
                            "benchmark",
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -6440,7 +6440,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -9132,7 +9132,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -9440,7 +9440,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -10203,7 +10203,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -11325,7 +11325,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
@ -12652,7 +12652,7 @@ components:
          enum:
            - model
            - shield
-            - vector_db
+            - vector_store
            - dataset
            - scoring_function
            - benchmark
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):

    models = "models"
    shields = "shields"
-    vector_dbs = "vector_dbs"  # only used for routing
+    vector_stores = "vector_stores"  # only used for routing table
    datasets = "datasets"
    scoring_functions = "scoring_functions"
    benchmarks = "benchmarks"
--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
 class ResourceType(StrEnum):
    model = "model"
    shield = "shield"
-    vector_db = "vector_db"
+    vector_store = "vector_store"
    dataset = "dataset"
    scoring_function = "scoring_function"
    benchmark = "benchmark"
@ -34,4 +34,4 @@ class Resource(BaseModel):

    provider_id: str = Field(description="ID of the provider that owns this resource")

-    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
+    type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@ -1,93 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Literal, Protocol, runtime_checkable
-
-from pydantic import BaseModel
-
-from llama_stack.apis.resource import Resource, ResourceType
-from llama_stack.schema_utils import json_schema_type
-
-
-@json_schema_type
-class VectorDB(Resource):
-    """Vector database resource for storing and querying vector embeddings.
-
-    :param type: Type of resource, always 'vector_db' for vector databases
-    :param embedding_model: Name of the embedding model to use for vector generation
-    :param embedding_dimension: Dimension of the embedding vectors
-    """
-
-    type: Literal[ResourceType.vector_db] = ResourceType.vector_db
-
-    embedding_model: str
-    embedding_dimension: int
-    vector_db_name: str | None = None
-
-    @property
-    def vector_db_id(self) -> str:
-        return self.identifier
-
-    @property
-    def provider_vector_db_id(self) -> str | None:
-        return self.provider_resource_id
-
-
-class VectorDBInput(BaseModel):
-    """Input parameters for creating or configuring a vector database.
-
-    :param vector_db_id: Unique identifier for the vector database
-    :param embedding_model: Name of the embedding model to use for vector generation
-    :param embedding_dimension: Dimension of the embedding vectors
-    :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
-    """
-
-    vector_db_id: str
-    embedding_model: str
-    embedding_dimension: int
-    provider_id: str | None = None
-    provider_vector_db_id: str | None = None
-
-
-class ListVectorDBsResponse(BaseModel):
-    """Response from listing vector databases.
-
-    :param data: List of vector databases
-    """
-
-    data: list[VectorDB]
-
-
-@runtime_checkable
-class VectorDBs(Protocol):
-    """Internal protocol for vector_dbs routing - no public API endpoints."""
-
-    async def list_vector_dbs(self) -> ListVectorDBsResponse:
-        """Internal method to list vector databases."""
-        ...
-
-    async def get_vector_db(
-        self,
-        vector_db_id: str,
-    ) -> VectorDB:
-        """Internal method to get a vector database by ID."""
-        ...
-
-    async def register_vector_db(
-        self,
-        vector_db_id: str,
-        embedding_model: str,
-        embedding_dimension: int | None = 384,
-        provider_id: str | None = None,
-        vector_db_name: str | None = None,
-        provider_vector_db_id: str | None = None,
-    ) -> VectorDB:
-        """Internal method to register a vector database."""
-        ...
-
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        """Internal method to unregister a vector database."""
-        ...
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -15,7 +15,7 @@ from fastapi import Body
 from pydantic import BaseModel, Field

 from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
    total: int


+# TODO: rename this as OpenAIVectorStore
@json_schema_type
 class VectorStoreObject(BaseModel):
    """OpenAI Vector Store object.
@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
    chunking_strategy: VectorStoreChunkingStrategy | None = None


-class VectorDBStore(Protocol):
-    def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
+class VectorStoreTable(Protocol):
+    def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...


@runtime_checkable
@trace_protocol
 class VectorIO(Protocol):
-    vector_db_store: VectorDBStore | None = None
+    vector_store_table: VectorStoreTable | None = None

    # this will just block now until chunks are inserted, but it should
    # probably return a Job instance which can be polled for completion
+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert_chunks(
        self,
@ -546,6 +548,7 @@ class VectorIO(Protocol):
        """
        ...

+    # TODO: rename vector_db_id to vector_store_id once Stainless is working
    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
    async def query_chunks(
        self,
--- a/llama_stack/apis/vector_stores/init.py
+++ b/llama_stack/apis/vector_stores/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from .vector_dbs import *
+from .vector_stores import *
--- a/llama_stack/apis/vector_stores/vector_stores.py
+++ b/llama_stack/apis/vector_stores/vector_stores.py
@ -0,0 +1,51 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Literal
+
+from pydantic import BaseModel
+
+from llama_stack.apis.resource import Resource, ResourceType
+
+
+# Internal resource type for storing the vector store routing and other information
+class VectorStore(Resource):
+    """Vector database resource for storing and querying vector embeddings.
+
+    :param type: Type of resource, always 'vector_store' for vector stores
+    :param embedding_model: Name of the embedding model to use for vector generation
+    :param embedding_dimension: Dimension of the embedding vectors
+    """
+
+    type: Literal[ResourceType.vector_store] = ResourceType.vector_store
+
+    embedding_model: str
+    embedding_dimension: int
+    vector_store_name: str | None = None
+
+    @property
+    def vector_store_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_vector_store_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class VectorStoreInput(BaseModel):
+    """Input parameters for creating or configuring a vector database.
+
+    :param vector_store_id: Unique identifier for the vector store
+    :param embedding_model: Name of the embedding model to use for vector generation
+    :param embedding_dimension: Dimension of the embedding vectors
+    :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
+    """
+
+    vector_store_id: str
+    embedding_model: str
+    embedding_dimension: int
+    provider_id: str | None = None
+    provider_vector_store_id: str | None = None
--- a/llama_stack/cli/llama.py
+++ b/llama_stack/cli/llama.py
@ -6,6 +6,8 @@

 import argparse

+from llama_stack.log import setup_logging
+
 from .stack import StackParser
 from .stack.utils import print_subcommand_description

@ -42,6 +44,9 @@ class LlamaCLIParser:


 def main():
+    # Initialize logging from environment variables before any other operations
+    setup_logging()
+
    parser = LlamaCLIParser()
    args = parser.parse_args()
    parser.run(args)
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -1,519 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import argparse
-import importlib.resources
-import json
-import os
-import shutil
-import sys
-import textwrap
-from functools import lru_cache
-from importlib.abc import Traversable
-from pathlib import Path
-
-import yaml
-from prompt_toolkit import prompt
-from prompt_toolkit.completion import WordCompleter
-from prompt_toolkit.validation import Validator
-from termcolor import colored, cprint
-
-from llama_stack.cli.stack.utils import ImageType
-from llama_stack.cli.table import print_table
-from llama_stack.core.build import (
-    SERVER_DEPENDENCIES,
-    build_image,
-    get_provider_dependencies,
-)
-from llama_stack.core.configure import parse_and_maybe_upgrade_config
-from llama_stack.core.datatypes import (
-    BuildConfig,
-    BuildProvider,
-    DistributionSpec,
-    Provider,
-    StackRunConfig,
-)
-from llama_stack.core.distribution import get_provider_registry
-from llama_stack.core.external import load_external_apis
-from llama_stack.core.resolver import InvalidProviderError
-from llama_stack.core.stack import replace_env_vars
-from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
-    KVStoreReference,
-    ServerStoresConfig,
-    SqliteKVStoreConfig,
-    SqliteSqlStoreConfig,
-    SqlStoreReference,
-    StorageConfig,
-)
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
-from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.core.utils.exec import formulate_run_args, run_command
-from llama_stack.core.utils.image_types import LlamaStackImageType
-from llama_stack.providers.datatypes import Api
-
-DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
-
-
-@lru_cache
-def available_distros_specs() -> dict[str, BuildConfig]:
-    import yaml
-
-    distro_specs = {}
-    for p in DISTRIBS_PATH.rglob("*build.yaml"):
-        distro_name = p.parent.name
-        with open(p) as f:
-            build_config = BuildConfig(**yaml.safe_load(f))
-            distro_specs[distro_name] = build_config
-    return distro_specs
-
-
-def run_stack_build_command(args: argparse.Namespace) -> None:
-    if args.list_distros:
-        return _run_distro_list_cmd()
-
-    if args.image_type == ImageType.VENV.value:
-        current_venv = os.environ.get("VIRTUAL_ENV")
-        image_name = args.image_name or current_venv
-    else:
-        image_name = args.image_name
-
-    if args.template:
-        cprint(
-            "The --template argument is deprecated. Please use --distro instead.",
-            color="red",
-            file=sys.stderr,
-        )
-        distro_name = args.template
-    else:
-        distro_name = args.distribution
-
-    if distro_name:
-        available_distros = available_distros_specs()
-        if distro_name not in available_distros:
-            cprint(
-                f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions",
-                color="red",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-        build_config = available_distros[distro_name]
-        if args.image_type:
-            build_config.image_type = args.image_type
-        else:
-            cprint(
-                f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}",
-                color="red",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-    elif args.providers:
-        provider_list: dict[str, list[BuildProvider]] = dict()
-        for api_provider in args.providers.split(","):
-            if "=" not in api_provider:
-                cprint(
-                    "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-            api, provider_type = api_provider.split("=")
-            providers_for_api = get_provider_registry().get(Api(api), None)
-            if providers_for_api is None:
-                cprint(
-                    f"{api} is not a valid API.",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-            if provider_type in providers_for_api:
-                provider = BuildProvider(
-                    provider_type=provider_type,
-                    module=None,
-                )
-                provider_list.setdefault(api, []).append(provider)
-            else:
-                cprint(
-                    f"{provider} is not a valid provider for the {api} API.",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-        distribution_spec = DistributionSpec(
-            providers=provider_list,
-            description=",".join(args.providers),
-        )
-        if not args.image_type:
-            cprint(
-                f"Please specify a image-type (container | venv) for {args.template}",
-                color="red",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-
-        build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
-    elif not args.config and not distro_name:
-        name = prompt(
-            "> Enter a name for your Llama Stack (e.g. my-local-stack): ",
-            validator=Validator.from_callable(
-                lambda x: len(x) > 0,
-                error_message="Name cannot be empty, please enter a name",
-            ),
-        )
-
-        image_type = prompt(
-            "> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
-            completer=WordCompleter([e.value for e in ImageType]),
-            complete_while_typing=True,
-            validator=Validator.from_callable(
-                lambda x: x in [e.value for e in ImageType],
-                error_message="Invalid image type. Use <TAB> to see options",
-            ),
-        )
-
-        image_name = f"llamastack-{name}"
-
-        cprint(
-            textwrap.dedent(
-                """
-            Llama Stack is composed of several APIs working together. Let's select
-            the provider types (implementations) you want to use for these APIs.
-            """,
-            ),
-            color="green",
-            file=sys.stderr,
-        )
-
-        cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
-
-        providers: dict[str, list[BuildProvider]] = dict()
-        for api, providers_for_api in get_provider_registry().items():
-            available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
-            if not available_providers:
-                continue
-            api_provider = prompt(
-                f"> Enter provider for API {api.value}: ",
-                completer=WordCompleter(available_providers),
-                complete_while_typing=True,
-                validator=Validator.from_callable(
-                    lambda x: x in available_providers,  # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847
-                    error_message="Invalid provider, use <TAB> to see options",
-                ),
-            )
-
-            string_providers = api_provider.split(" ")
-
-            for provider in string_providers:
-                providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider))
-
-        description = prompt(
-            "\n > (Optional) Enter a short description for your Llama Stack: ",
-            default="",
-        )
-
-        distribution_spec = DistributionSpec(
-            providers=providers,
-            description=description,
-        )
-
-        build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
-    else:
-        with open(args.config) as f:
-            try:
-                contents = yaml.safe_load(f)
-                contents = replace_env_vars(contents)
-                build_config = BuildConfig(**contents)
-                if args.image_type:
-                    build_config.image_type = args.image_type
-            except Exception as e:
-                cprint(
-                    f"Could not parse config file {args.config}: {e}",
-                    color="red",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-
-    if args.print_deps_only:
-        print(f"# Dependencies for {distro_name or args.config or image_name}")
-        normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
-        normal_deps += SERVER_DEPENDENCIES
-        print(f"uv pip install {' '.join(normal_deps)}")
-        for special_dep in special_deps:
-            print(f"uv pip install {special_dep}")
-        for external_dep in external_provider_dependencies:
-            print(f"uv pip install {external_dep}")
-        return
-
-    try:
-        run_config = _run_stack_build_command_from_build_config(
-            build_config,
-            image_name=image_name,
-            config_path=args.config,
-            distro_name=distro_name,
-        )
-
-    except (Exception, RuntimeError) as exc:
-        import traceback
-
-        cprint(
-            f"Error building stack: {exc}",
-            color="red",
-            file=sys.stderr,
-        )
-        cprint("Stack trace:", color="red", file=sys.stderr)
-        traceback.print_exc()
-        sys.exit(1)
-
-    if run_config is None:
-        cprint(
-            "Run config path is empty",
-            color="red",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    if args.run:
-        config_dict = yaml.safe_load(run_config.read_text())
-        config = parse_and_maybe_upgrade_config(config_dict)
-        if config.external_providers_dir and not config.external_providers_dir.exists():
-            config.external_providers_dir.mkdir(exist_ok=True)
-        run_args = formulate_run_args(args.image_type, image_name or config.image_name)
-        run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)])
-        run_command(run_args)
-
-
-def _generate_run_config(
-    build_config: BuildConfig,
-    build_dir: Path,
-    image_name: str,
-) -> Path:
-    """
-    Generate a run.yaml template file for user to edit from a build.yaml file
-    """
-    apis = list(build_config.distribution_spec.providers.keys())
-    distro_dir = DISTRIBS_BASE_DIR / image_name
-    storage = StorageConfig(
-        backends={
-            "kv_default": SqliteKVStoreConfig(
-                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
-            ),
-            "sql_default": SqliteSqlStoreConfig(
-                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
-            ),
-        },
-        stores=ServerStoresConfig(
-            metadata=KVStoreReference(
-                backend="kv_default",
-                namespace="registry",
-            ),
-            inference=InferenceStoreReference(
-                backend="sql_default",
-                table_name="inference_store",
-            ),
-            conversations=SqlStoreReference(
-                backend="sql_default",
-                table_name="openai_conversations",
-            ),
-        ),
-    )
-
-    run_config = StackRunConfig(
-        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
-        image_name=image_name,
-        apis=apis,
-        providers={},
-        storage=storage,
-        external_providers_dir=build_config.external_providers_dir
-        if build_config.external_providers_dir
-        else EXTERNAL_PROVIDERS_DIR,
-    )
-    # build providers dict
-    provider_registry = get_provider_registry(build_config)
-    for api in apis:
-        run_config.providers[api] = []
-        providers = build_config.distribution_spec.providers[api]
-
-        for provider in providers:
-            pid = provider.provider_type.split("::")[-1]
-
-            p = provider_registry[Api(api)][provider.provider_type]
-            if p.deprecation_error:
-                raise InvalidProviderError(p.deprecation_error)
-
-            try:
-                config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
-            except (ModuleNotFoundError, ValueError) as exc:
-                # HACK ALERT:
-                # This code executes after building is done, the import cannot work since the
-                # package is either available in the venv or container - not available on the host.
-                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
-                # external
-                cprint(
-                    f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
-                    color="yellow",
-                    file=sys.stderr,
-                )
-                # Set config_type to None to avoid UnboundLocalError
-                config_type = None
-
-            if config_type is not None and hasattr(config_type, "sample_run_config"):
-                config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
-            else:
-                config = {}
-
-            p_spec = Provider(
-                provider_id=pid,
-                provider_type=provider.provider_type,
-                config=config,
-                module=provider.module,
-            )
-            run_config.providers[api].append(p_spec)
-
-    run_config_file = build_dir / f"{image_name}-run.yaml"
-
-    with open(run_config_file, "w") as f:
-        to_write = json.loads(run_config.model_dump_json())
-        f.write(yaml.dump(to_write, sort_keys=False))
-
-    # Only print this message for non-container builds since it will be displayed before the
-    # container is built
-    # For non-container builds, the run.yaml is generated at the very end of the build process so it
-    # makes sense to display this message
-    if build_config.image_type != LlamaStackImageType.CONTAINER.value:
-        cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
-    return run_config_file
-
-
-def _run_stack_build_command_from_build_config(
-    build_config: BuildConfig,
-    image_name: str | None = None,
-    distro_name: str | None = None,
-    config_path: str | None = None,
-) -> Path | Traversable:
-    image_name = image_name or build_config.image_name
-    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
-        if distro_name:
-            image_name = f"distribution-{distro_name}"
-        else:
-            if not image_name:
-                raise ValueError("Please specify an image name when building a container image without a template")
-    else:
-        if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
-            image_name = "__system__"
-        if not image_name:
-            raise ValueError("Please specify an image name when building a venv image")
-
-    # At this point, image_name should be guaranteed to be a string
-    if image_name is None:
-        raise ValueError("image_name should not be None after validation")
-
-    if distro_name:
-        build_dir = DISTRIBS_BASE_DIR / distro_name
-        build_file_path = build_dir / f"{distro_name}-build.yaml"
-    else:
-        if image_name is None:
-            raise ValueError("image_name cannot be None")
-        build_dir = DISTRIBS_BASE_DIR / image_name
-        build_file_path = build_dir / f"{image_name}-build.yaml"
-
-    os.makedirs(build_dir, exist_ok=True)
-    run_config_file = None
-    # Generate the run.yaml so it can be included in the container image with the proper entrypoint
-    # Only do this if we're building a container image and we're not using a template
-    if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path:
-        cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
-        run_config_file = _generate_run_config(build_config, build_dir, image_name)
-
-    with open(build_file_path, "w") as f:
-        to_write = json.loads(build_config.model_dump_json(exclude_none=True))
-        f.write(yaml.dump(to_write, sort_keys=False))
-
-    # We first install the external APIs so that the build process can use them and discover the
-    # providers dependencies
-    if build_config.external_apis_dir:
-        cprint("Installing external APIs", color="yellow", file=sys.stderr)
-        external_apis = load_external_apis(build_config)
-        if external_apis:
-            # install the external APIs
-            packages = []
-            for _, api_spec in external_apis.items():
-                if api_spec.pip_packages:
-                    packages.extend(api_spec.pip_packages)
-                    cprint(
-                        f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}",
-                        color="yellow",
-                        file=sys.stderr,
-                    )
-            return_code = run_command(["uv", "pip", "install", *packages])
-            if return_code != 0:
-                packages_str = ", ".join(packages)
-                raise RuntimeError(
-                    f"Failed to install external APIs packages: {packages_str} (return code: {return_code})"
-                )
-
-    return_code = build_image(
-        build_config,
-        image_name,
-        distro_or_config=distro_name or config_path or str(build_file_path),
-        run_config=run_config_file.as_posix() if run_config_file else None,
-    )
-    if return_code != 0:
-        raise RuntimeError(f"Failed to build image {image_name}")
-
-    if distro_name:
-        # copy run.yaml from distribution to build_dir instead of generating it again
-        distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml"
-        run_config_file = build_dir / f"{distro_name}-run.yaml"
-
-        with importlib.resources.as_file(distro_path) as path:
-            shutil.copy(path, run_config_file)
-
-        cprint("Build Successful!", color="green", file=sys.stderr)
-        cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
-        if build_config.image_type == LlamaStackImageType.VENV:
-            cprint(
-                "You can run the new Llama Stack distro (after activating "
-                + colored(image_name, "cyan")
-                + ") via: "
-                + colored(f"llama stack run {run_config_file}", "blue"),
-                color="green",
-                file=sys.stderr,
-            )
-        elif build_config.image_type == LlamaStackImageType.CONTAINER:
-            cprint(
-                "You can run the container with: "
-                + colored(
-                    f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
-                ),
-                color="green",
-                file=sys.stderr,
-            )
-        return distro_path
-    else:
-        return _generate_run_config(build_config, build_dir, image_name)
-
-
-def _run_distro_list_cmd() -> None:
-    headers = [
-        "Distribution Name",
-        # "Providers",
-        "Description",
-    ]
-
-    rows = []
-    for distro_name, spec in available_distros_specs().items():
-        rows.append(
-            [
-                distro_name,
-                # json.dumps(spec.distribution_spec.providers, indent=2),
-                spec.distribution_spec.description,
-            ]
-        )
-    print_table(
-        rows,
-        headers,
-        separate_rows=True,
-    )
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -1,106 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import argparse
-import textwrap
-
-from llama_stack.cli.stack.utils import ImageType
-from llama_stack.cli.subcommand import Subcommand
-from llama_stack.log import get_logger
-
-logger = get_logger(__name__, category="cli")
-
-
-class StackBuild(Subcommand):
-    def __init__(self, subparsers: argparse._SubParsersAction):
-        super().__init__()
-        self.parser = subparsers.add_parser(
-            "build",
-            prog="llama stack build",
-            description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
-            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        )
-        self._add_arguments()
-        self.parser.set_defaults(func=self._run_stack_build_command)
-
-    def _add_arguments(self):
-        self.parser.add_argument(
-            "--config",
-            type=str,
-            default=None,
-            help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
-        )
-
-        self.parser.add_argument(
-            "--template",
-            type=str,
-            default=None,
-            help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
-        )
-        self.parser.add_argument(
-            "--distro",
-            "--distribution",
-            dest="distribution",
-            type=str,
-            default=None,
-            help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
-        )
-
-        self.parser.add_argument(
-            "--list-distros",
-            "--list-distributions",
-            action="store_true",
-            dest="list_distros",
-            default=False,
-            help="Show the available distributions for building a Llama Stack distribution",
-        )
-
-        self.parser.add_argument(
-            "--image-type",
-            type=str,
-            help="Image Type to use for the build. If not specified, will use the image type from the template config.",
-            choices=[e.value for e in ImageType],
-            default=None,  # no default so we can detect if a user specified --image-type and override image_type in the config
-        )
-
-        self.parser.add_argument(
-            "--image-name",
-            type=str,
-            help=textwrap.dedent(
-                f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for
-the build. If not specified, currently active environment will be used if found.
-            """
-            ),
-            default=None,
-        )
-        self.parser.add_argument(
-            "--print-deps-only",
-            default=False,
-            action="store_true",
-            help="Print the dependencies for the stack only, without building the stack",
-        )
-
-        self.parser.add_argument(
-            "--run",
-            action="store_true",
-            default=False,
-            help="Run the stack after building using the same image type, name, and other applicable arguments",
-        )
-        self.parser.add_argument(
-            "--providers",
-            type=str,
-            default=None,
-            help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
-        )
-
-    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
-        logger.warning(
-            "The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
-        )
-        # always keep implementation completely silo-ed away from CLI so CLI
-        # can be fast to load and reduces dependencies
-        from ._build import run_stack_build_command
-
-        return run_stack_build_command(args)
--- a/llama_stack/cli/stack/stack.py
+++ b/llama_stack/cli/stack/stack.py
@ -11,7 +11,6 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds
 from llama_stack.cli.stack.utils import print_subcommand_description
 from llama_stack.cli.subcommand import Subcommand

-from .build import StackBuild
 from .list_apis import StackListApis
 from .list_deps import StackListDeps
 from .list_providers import StackListProviders
@ -41,7 +40,6 @@ class StackParser(Subcommand):

        # Add sub-commands
        StackListDeps.create(subparsers)
-        StackBuild.create(subparsers)
        StackListApis.create(subparsers)
        StackListProviders.create(subparsers)
        StackRun.create(subparsers)
--- a/llama_stack/core/access_control/datatypes.py
+++ b/llama_stack/core/access_control/datatypes.py
@ -41,7 +41,7 @@ class AccessRule(BaseModel):
    A rule defines a list of action either to permit or to forbid. It may specify a
    principal or a resource that must match for the rule to take effect. The resource
    to match should be specified in the form of a type qualified identifier, e.g.
-    model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
+    model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
    e.g. model::*. If the principal or resource are not specified, they will match all
    requests.

@ -79,9 +79,9 @@ class AccessRule(BaseModel):
      description: any user has read access to any resource created by a member of their team
    - forbid:
        actions: [create, read, delete]
-        resource: vector_db::*
+        resource: vector_store::*
      unless: user with admin in roles
-      description: only user with admin role can use vector_db resources
+      description: only user with admin role can use vector_store resources

    """

--- a/llama_stack/core/build_container.sh
+++ b/llama_stack/core/build_container.sh
@ -1,410 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
-LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
-
-TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
-PYPI_VERSION=${PYPI_VERSION:-}
-BUILD_PLATFORM=${BUILD_PLATFORM:-}
-# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
-# Reference: https://github.com/astral-sh/uv/pull/1694
-UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
-
-# mounting is not supported by docker buildx, so we use COPY instead
-USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
-# Path to the run.yaml file in the container
-RUN_CONFIG_PATH=/app/run.yaml
-
-BUILD_CONTEXT_DIR=$(pwd)
-
-set -euo pipefail
-
-# Define color codes
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Usage function
-usage() {
-  echo "Usage: $0 --image-name <image_name> --container-base <container_base> --normal-deps <pip_dependencies> [--run-config <run_config>] [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
-  echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'"
-  exit 1
-}
-
-# Parse arguments
-image_name=""
-container_base=""
-normal_deps=""
-external_provider_deps=""
-optional_deps=""
-run_config=""
-distro_or_config=""
-
-while [[ $# -gt 0 ]]; do
-  key="$1"
-  case "$key" in
-    --image-name)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --image-name requires a string value" >&2
-        usage
-      fi
-      image_name="$2"
-      shift 2
-      ;;
-    --container-base)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --container-base requires a string value" >&2
-        usage
-      fi
-      container_base="$2"
-      shift 2
-      ;;
-    --normal-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --normal-deps requires a string value" >&2
-        usage
-      fi
-      normal_deps="$2"
-      shift 2
-      ;;
-    --external-provider-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --external-provider-deps requires a string value" >&2
-        usage
-      fi
-      external_provider_deps="$2"
-      shift 2
-      ;;
-    --optional-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --optional-deps requires a string value" >&2
-        usage
-      fi
-      optional_deps="$2"
-      shift 2
-      ;;
-    --run-config)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --run-config requires a string value" >&2
-        usage
-      fi
-      run_config="$2"
-      shift 2
-      ;;
-    --distro-or-config)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --distro-or-config requires a string value" >&2
-        usage
-      fi
-      distro_or_config="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown option: $1" >&2
-      usage
-      ;;
-  esac
-done
-
-# Check required arguments
-if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then
-  echo "Error: --image-name, --container-base, and --normal-deps are required." >&2
-  usage
-fi
-
-CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
-CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
-TEMP_DIR=$(mktemp -d)
-SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-source "$SCRIPT_DIR/common.sh"
-
-add_to_container() {
-  output_file="$TEMP_DIR/Containerfile"
-  if [ -t 0 ]; then
-    printf '%s\n' "$1" >>"$output_file"
-  else
-    cat >>"$output_file"
-  fi
-}
-
-if ! is_command_available "$CONTAINER_BINARY"; then
-  printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
-  exit 1
-fi
-
-if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
-  add_to_container << EOF
-FROM $container_base
-WORKDIR /app
-
-# We install the Python 3.12 dev headers and build tools so that any
-# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully.
-
-RUN dnf -y update && dnf install -y iputils git net-tools wget \
-    vim-minimal python3.12 python3.12-pip python3.12-wheel \
-    python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
-    ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all
-
-ENV UV_SYSTEM_PYTHON=1
-RUN pip install uv
-EOF
-else
-  add_to_container << EOF
-FROM $container_base
-WORKDIR /app
-
-RUN apt-get update && apt-get install -y \
-       iputils-ping net-tools iproute2 dnsutils telnet \
-       curl wget telnet git\
-       procps psmisc lsof \
-       traceroute \
-       bubblewrap \
-       gcc g++ \
-       && rm -rf /var/lib/apt/lists/*
-
-ENV UV_SYSTEM_PYTHON=1
-RUN pip install uv
-EOF
-fi
-
-# Add pip dependencies first since llama-stack is what will change most often
-# so we can reuse layers.
-if [ -n "$normal_deps" ]; then
-  read -ra pip_args <<<  "$normal_deps"
-  quoted_deps=$(printf " %q" "${pip_args[@]}")
-  add_to_container << EOF
-RUN uv pip install --no-cache $quoted_deps
-EOF
-fi
-
-if [ -n "$optional_deps" ]; then
-  IFS='#' read -ra parts <<<"$optional_deps"
-  for part in "${parts[@]}"; do
-    read -ra pip_args <<< "$part"
-    quoted_deps=$(printf " %q" "${pip_args[@]}")
-    add_to_container <<EOF
-RUN uv pip install --no-cache $quoted_deps
-EOF
-  done
-fi
-
-if [ -n "$external_provider_deps" ]; then
-  IFS='#' read -ra parts <<<"$external_provider_deps"
-  for part in "${parts[@]}"; do
-    read -ra pip_args <<< "$part"
-    quoted_deps=$(printf " %q" "${pip_args[@]}")
-    add_to_container <<EOF
-RUN uv pip install --no-cache $quoted_deps
-EOF
-    add_to_container <<EOF
-RUN python3 - <<PYTHON | uv pip install --no-cache -r -
-import importlib
-import sys
-
-try:
-    package_name = '$part'.split('==')[0].split('>=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0]
-    module = importlib.import_module(f'{package_name}.provider')
-    spec = module.get_provider_spec()
-    if hasattr(spec, 'pip_packages') and spec.pip_packages:
-        if isinstance(spec.pip_packages, (list, tuple)):
-            print('\n'.join(spec.pip_packages))
-except Exception as e:
-    print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr)
-PYTHON
-EOF
-  done
-fi
-
-get_python_cmd() {
-    if is_command_available python; then
-        echo "python"
-    elif is_command_available python3; then
-        echo "python3"
-    else
-        echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
-        exit 1
-    fi
-}
-
-if [ -n "$run_config" ]; then
-  # Copy the run config to the build context since it's an absolute path
-  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
-
-  # Parse the run.yaml configuration to identify external provider directories
-  # If external providers are specified, copy their directory to the container
-  # and update the configuration to reference the new container path
-  python_cmd=$(get_python_cmd)
-  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
-  external_providers_dir=$(eval echo "$external_providers_dir")
-  if [ -n "$external_providers_dir" ]; then
-    if [ -d "$external_providers_dir" ]; then
-    echo "Copying external providers directory: $external_providers_dir"
-    cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
-    add_to_container << EOF
-COPY providers.d /.llama/providers.d
-EOF
-    fi
-
-    # Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
-    if [ "$(uname)" = "Darwin" ]; then
-      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
-      rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
-    else
-      sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
-    fi
-  fi
-
-  # Copy run config into docker image
-  add_to_container << EOF
-COPY run.yaml $RUN_CONFIG_PATH
-EOF
-fi
-
-stack_mount="/app/llama-stack-source"
-client_mount="/app/llama-stack-client-source"
-
-install_local_package() {
-  local dir="$1"
-  local mount_point="$2"
-  local name="$3"
-
-  if [ ! -d "$dir" ]; then
-    echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2
-    exit 1
-  fi
-
-  if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then
-    add_to_container << EOF
-COPY $dir $mount_point
-EOF
-  fi
-  add_to_container << EOF
-RUN uv pip install --no-cache -e $mount_point
-EOF
-}
-
-
-if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-  install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR"
-fi
-
-if [ -n "$LLAMA_STACK_DIR" ]; then
-  install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR"
-else
-  if [ -n "$TEST_PYPI_VERSION" ]; then
-    # these packages are damaged in test-pypi, so install them first
-    add_to_container << EOF
-RUN uv pip install --no-cache fastapi libcst
-EOF
-    add_to_container << EOF
-RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
-  --index-strategy unsafe-best-match \
-  llama-stack==$TEST_PYPI_VERSION
-
-EOF
-  else
-    if [ -n "$PYPI_VERSION" ]; then
-      SPEC_VERSION="llama-stack==${PYPI_VERSION}"
-    else
-      SPEC_VERSION="llama-stack"
-    fi
-    add_to_container << EOF
-RUN uv pip install --no-cache $SPEC_VERSION
-EOF
-  fi
-fi
-
-# remove uv after installation
-  add_to_container << EOF
-RUN pip uninstall -y uv
-EOF
-
-# If a run config is provided, we use the llama stack CLI
-if [[ -n "$run_config" ]]; then
-  add_to_container << EOF
-ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
-EOF
-elif [[ "$distro_or_config" != *.yaml ]]; then
-  add_to_container << EOF
-ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
-EOF
-fi
-
-# Add other require item commands genearic to all containers
-add_to_container << EOF
-
-RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
-EOF
-
-printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
-cat "$TEMP_DIR"/Containerfile
-printf "\n"
-
-# Start building the CLI arguments
-CLI_ARGS=()
-
-# Read CONTAINER_OPTS and put it in an array
-read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
-
-if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
-  if [ -n "$LLAMA_STACK_DIR" ]; then
-    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
-  fi
-  if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
-  fi
-fi
-
-if is_command_available selinuxenabled && selinuxenabled; then
-  # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
-  CLI_ARGS+=("--security-opt" "label=disable")
-fi
-
-# Set version tag based on PyPI version
-if [ -n "$PYPI_VERSION" ]; then
-  version_tag="$PYPI_VERSION"
-elif [ -n "$TEST_PYPI_VERSION" ]; then
-  version_tag="test-$TEST_PYPI_VERSION"
-elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then
-  version_tag="dev"
-else
-  URL="https://pypi.org/pypi/llama-stack/json"
-  version_tag=$(curl -s $URL | jq -r '.info.version')
-fi
-
-# Add version tag to image name
-image_tag="$image_name:$version_tag"
-
-# Detect platform architecture
-ARCH=$(uname -m)
-if [ -n "$BUILD_PLATFORM" ]; then
-  CLI_ARGS+=("--platform" "$BUILD_PLATFORM")
-elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
-  CLI_ARGS+=("--platform" "linux/arm64")
-elif [ "$ARCH" = "x86_64" ]; then
-  CLI_ARGS+=("--platform" "linux/amd64")
-else
-  echo "Unsupported architecture: $ARCH"
-  exit 1
-fi
-
-echo "PWD: $(pwd)"
-echo "Containerfile: $TEMP_DIR/Containerfile"
-set -x
-
-$CONTAINER_BINARY build \
-  "${CLI_ARGS[@]}" \
-  -t "$image_tag" \
-  -f "$TEMP_DIR/Containerfile" \
-  "$BUILD_CONTEXT_DIR"
-
-# clean up tmp/configs
-rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
-set +x
-
-echo "Success!"
--- a/llama_stack/core/build_venv.sh
+++ b/llama_stack/core/build_venv.sh
@ -1,220 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
-LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
-TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
-# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
-# Reference: https://github.com/astral-sh/uv/pull/1694
-UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
-UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-}
-VIRTUAL_ENV=${VIRTUAL_ENV:-}
-
-set -euo pipefail
-
-# Define color codes
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
-source "$SCRIPT_DIR/common.sh"
-
-# Usage function
-usage() {
-  echo "Usage: $0 --env-name <env_name> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
-  echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
-  exit 1
-}
-
-# Parse arguments
-env_name=""
-normal_deps=""
-external_provider_deps=""
-optional_deps=""
-
-while [[ $# -gt 0 ]]; do
-  key="$1"
-  case "$key" in
-    --env-name)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --env-name requires a string value" >&2
-        usage
-      fi
-      env_name="$2"
-      shift 2
-      ;;
-    --normal-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --normal-deps requires a string value" >&2
-        usage
-      fi
-      normal_deps="$2"
-      shift 2
-      ;;
-    --external-provider-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --external-provider-deps requires a string value" >&2
-        usage
-      fi
-      external_provider_deps="$2"
-      shift 2
-      ;;
-    --optional-deps)
-      if [[ -z "$2" || "$2" == --* ]]; then
-        echo "Error: --optional-deps requires a string value" >&2
-        usage
-      fi
-      optional_deps="$2"
-      shift 2
-      ;;
-    *)
-      echo "Unknown option: $1" >&2
-      usage
-      ;;
-  esac
-done
-
-# Check required arguments
-if [[ -z "$env_name" || -z "$normal_deps" ]]; then
-  echo "Error: --env-name and --normal-deps are required." >&2
-  usage
-fi
-
-if [ -n "$LLAMA_STACK_DIR" ]; then
-  echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
-fi
-if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-  echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
-fi
-
-ENVNAME=""
-
-# pre-run checks to make sure we can proceed with the installation
-pre_run_checks() {
-  local env_name="$1"
-
-  if ! is_command_available uv; then
-    echo "uv is not installed, trying to install it."
-    if ! is_command_available pip; then
-      echo "pip is not installed, cannot automatically install 'uv'."
-      echo "Follow this link to install it:"
-      echo "https://docs.astral.sh/uv/getting-started/installation/"
-      exit 1
-    else
-      pip install uv
-    fi
-  fi
-
-  # checking if an environment with the same name already exists
-  if [ -d "$env_name" ]; then
-    echo "Environment '$env_name' already exists, re-using it."
-  fi
-}
-
-run() {
-  # Use only global variables set by flag parser
-  if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then
-    echo "Installing dependencies in system Python environment"
-    export UV_SYSTEM_PYTHON=1
-  elif [ "$VIRTUAL_ENV" == "$env_name" ]; then
-    echo "Virtual environment $env_name is already active"
-  else
-    echo "Using virtual environment $env_name"
-    uv venv "$env_name"
-    source "$env_name/bin/activate"
-  fi
-
-  if [ -n "$TEST_PYPI_VERSION" ]; then
-    uv pip install fastapi libcst
-    uv pip install --extra-index-url https://test.pypi.org/simple/ \
-      --index-strategy unsafe-best-match \
-      llama-stack=="$TEST_PYPI_VERSION" \
-      $normal_deps
-    if [ -n "$optional_deps" ]; then
-      IFS='#' read -ra parts <<<"$optional_deps"
-      for part in "${parts[@]}"; do
-        echo "$part"
-        uv pip install $part
-      done
-    fi
-    if [ -n "$external_provider_deps" ]; then
-      IFS='#' read -ra parts <<<"$external_provider_deps"
-      for part in "${parts[@]}"; do
-        echo "$part"
-        uv pip install "$part"
-      done
-    fi
-  else
-    if [ -n "$LLAMA_STACK_DIR" ]; then
-      # only warn if DIR does not start with "git+"
-      if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then
-        printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
-        exit 1
-      fi
-      printf "Installing from LLAMA_STACK_DIR: %s\n"  "$LLAMA_STACK_DIR"
-      # editable only if LLAMA_STACK_DIR does not start with "git+"
-      if [[ "$LLAMA_STACK_DIR" != git+* ]]; then
-        EDITABLE="-e"
-      else
-        EDITABLE=""
-      fi
-      uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR"
-    else
-      uv pip install --no-cache-dir llama-stack
-    fi
-
-    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-      # only warn if DIR does not start with "git+"
-      if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
-        printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
-        exit 1
-      fi
-      printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
-      # editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+"
-      if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
-        EDITABLE="-e"
-      else
-        EDITABLE=""
-      fi
-      uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR"
-    fi
-
-    printf "Installing pip dependencies\n"
-    uv pip install $normal_deps
-    if [ -n "$optional_deps" ]; then
-      IFS='#' read -ra parts <<<"$optional_deps"
-      for part in "${parts[@]}"; do
-        echo "Installing special provider module: $part"
-        uv pip install $part
-      done
-    fi
-    if [ -n "$external_provider_deps" ]; then
-      IFS='#' read -ra parts <<<"$external_provider_deps"
-      for part in "${parts[@]}"; do
-        echo "Installing external provider module: $part"
-        uv pip install "$part"
-        echo "Getting provider spec for module: $part and installing dependencies"
-        package_name=$(echo "$part" | sed 's/[<>=!].*//')
-        python3 -c "
-import importlib
-import sys
-try:
-    module = importlib.import_module(f'$package_name.provider')
-    spec = module.get_provider_spec()
-    if hasattr(spec, 'pip_packages') and spec.pip_packages:
-        print('\\n'.join(spec.pip_packages))
-except Exception as e:
-    print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
-" | uv pip install -r -
-      done
-    fi
-  fi
-}
-
-pre_run_checks "$env_name"
-run
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring
 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
 from llama_stack.apis.shields import Shield, ShieldInput
 from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
    pass


-class VectorDBWithOwner(VectorDB, ResourceWithOwner):
+class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
    pass


@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
    pass


-RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
+RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup

 RoutableObjectWithProvider = Annotated[
    ModelWithOwner
    | ShieldWithOwner
-    | VectorDBWithOwner
+    | VectorStoreWithOwner
    | DatasetWithOwner
    | ScoringFnWithOwner
    | BenchmarkWithOwner
@ -427,7 +427,7 @@ class RegisteredResources(BaseModel):

    models: list[ModelInput] = Field(default_factory=list)
    shields: list[ShieldInput] = Field(default_factory=list)
-    vector_dbs: list[VectorDBInput] = Field(default_factory=list)
+    vector_stores: list[VectorStoreInput] = Field(default_factory=list)
    datasets: list[DatasetInput] = Field(default_factory=list)
    scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
    benchmarks: list[BenchmarkInput] = Field(default_factory=list)
--- a/llama_stack/core/distribution.py
+++ b/llama_stack/core/distribution.py
@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
            router_api=Api.tool_runtime,
        ),
        AutoRoutedApiInfo(
-            routing_table_api=Api.vector_dbs,
+            routing_table_api=Api.vector_stores,
            router_api=Api.vector_io,
        ),
    ]
--- a/llama_stack/core/library_client.py
+++ b/llama_stack/core/library_client.py
@ -47,7 +47,7 @@ from llama_stack.core.stack import (
 from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
-from llama_stack.log import get_logger
+from llama_stack.log import get_logger, setup_logging
 from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
 from llama_stack.strong_typing.inspection import is_unwrapped_body_param

@ -200,6 +200,9 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        skip_logger_removal: bool = False,
    ):
        super().__init__()
+        # Initialize logging from environment variables first
+        setup_logging()
+
        # when using the library client, we should not log to console since many
        # of our logs are intended for server-side usage
        if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
@ -278,7 +281,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
            else:
                prefix = "!" if in_notebook() else ""
                cprint(
-                    f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n",
+                    f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n",
                    "yellow",
                    file=sys.stderr,
                )
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_dbs import VectorDBs
 from llama_stack.apis.vector_io import VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
        Api.inspect: Inspect,
        Api.batches: Batches,
        Api.vector_io: VectorIO,
-        Api.vector_dbs: VectorDBs,
+        Api.vector_stores: VectorStore,
        Api.models: Models,
        Api.safety: Safety,
        Api.shields: Shields,
--- a/llama_stack/core/routers/init.py
+++ b/llama_stack/core/routers/init.py
@ -29,7 +29,7 @@ async def get_routing_table_impl(
    from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
    from ..routing_tables.shields import ShieldsRoutingTable
    from ..routing_tables.toolgroups import ToolGroupsRoutingTable
-    from ..routing_tables.vector_dbs import VectorDBsRoutingTable
+    from ..routing_tables.vector_stores import VectorStoresRoutingTable

    api_to_tables = {
        "models": ModelsRoutingTable,
@ -38,7 +38,7 @@ async def get_routing_table_impl(
        "scoring_functions": ScoringFunctionsRoutingTable,
        "benchmarks": BenchmarksRoutingTable,
        "tool_groups": ToolGroupsRoutingTable,
-        "vector_dbs": VectorDBsRoutingTable,
+        "vector_stores": VectorStoresRoutingTable,
    }

    if api.value not in api_to_tables:
--- a/llama_stack/core/routers/tool_runtime.py
+++ b/llama_stack/core/routers/tool_runtime.py
@ -37,24 +37,24 @@ class ToolRuntimeRouter(ToolRuntime):
        async def query(
            self,
            content: InterleavedContent,
-            vector_db_ids: list[str],
+            vector_store_ids: list[str],
            query_config: RAGQueryConfig | None = None,
        ) -> RAGQueryResult:
-            logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
+            logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
            provider = await self.routing_table.get_provider_impl("knowledge_search")
-            return await provider.query(content, vector_db_ids, query_config)
+            return await provider.query(content, vector_store_ids, query_config)

        async def insert(
            self,
            documents: list[RAGDocument],
-            vector_db_id: str,
+            vector_store_id: str,
            chunk_size_in_tokens: int = 512,
        ) -> None:
            logger.debug(
-                f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
+                f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
            )
            provider = await self.routing_table.get_provider_impl("insert_into_memory")
-            return await provider.insert(documents, vector_db_id, chunk_size_in_tokens)
+            return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)

    def __init__(
        self,
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@ -71,25 +71,6 @@ class VectorIORouter(VectorIO):

        raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")

-    async def register_vector_db(
-        self,
-        vector_db_id: str,
-        embedding_model: str,
-        embedding_dimension: int | None = 384,
-        provider_id: str | None = None,
-        vector_db_name: str | None = None,
-        provider_vector_db_id: str | None = None,
-    ) -> None:
-        logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
-        await self.routing_table.register_vector_db(
-            vector_db_id,
-            embedding_model,
-            embedding_dimension,
-            provider_id,
-            vector_db_name,
-            provider_vector_db_id,
-        )
-
    async def insert_chunks(
        self,
        vector_db_id: str,
@ -165,22 +146,22 @@ class VectorIORouter(VectorIO):
            else:
                provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]

-        vector_db_id = f"vs_{uuid.uuid4()}"
-        registered_vector_db = await self.routing_table.register_vector_db(
-            vector_db_id=vector_db_id,
+        vector_store_id = f"vs_{uuid.uuid4()}"
+        registered_vector_store = await self.routing_table.register_vector_store(
+            vector_store_id=vector_store_id,
            embedding_model=embedding_model,
            embedding_dimension=embedding_dimension,
            provider_id=provider_id,
-            provider_vector_db_id=vector_db_id,
-            vector_db_name=params.name,
+            provider_vector_store_id=vector_store_id,
+            vector_store_name=params.name,
        )
-        provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
+        provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)

-        # Update model_extra with registered values so provider uses the already-registered vector_db
+        # Update model_extra with registered values so provider uses the already-registered vector_store
        if params.model_extra is None:
            params.model_extra = {}
-        params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
-        params.model_extra["provider_id"] = registered_vector_db.provider_id
+        params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
+        params.model_extra["provider_id"] = registered_vector_store.provider_id
        if embedding_model is not None:
            params.model_extra["embedding_model"] = embedding_model
        if embedding_dimension is not None:
@ -198,15 +179,15 @@ class VectorIORouter(VectorIO):
        logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
        # Route to default provider for now - could aggregate from all providers in the future
        # call retrieve on each vector dbs to get list of vector stores
-        vector_dbs = await self.routing_table.get_all_with_type("vector_db")
+        vector_stores = await self.routing_table.get_all_with_type("vector_store")
        all_stores = []
-        for vector_db in vector_dbs:
+        for vector_store in vector_stores:
            try:
-                provider = await self.routing_table.get_provider_impl(vector_db.identifier)
-                vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier)
+                provider = await self.routing_table.get_provider_impl(vector_store.identifier)
+                vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
                all_stores.append(vector_store)
            except Exception as e:
-                logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}")
+                logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
                continue

        # Sort by created_at
--- a/llama_stack/core/routing_tables/common.py
+++ b/llama_stack/core/routing_tables/common.py
@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
    elif api == Api.safety:
        return await p.register_shield(obj)
    elif api == Api.vector_io:
-        return await p.register_vector_db(obj)
+        return await p.register_vector_store(obj)
    elif api == Api.datasetio:
        return await p.register_dataset(obj)
    elif api == Api.scoring:
@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
 async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
    api = get_impl_api(p)
    if api == Api.vector_io:
-        return await p.unregister_vector_db(obj.identifier)
+        return await p.unregister_vector_store(obj.identifier)
    elif api == Api.inference:
        return await p.unregister_model(obj.identifier)
    elif api == Api.safety:
@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
            elif api == Api.safety:
                p.shield_store = self
            elif api == Api.vector_io:
-                p.vector_db_store = self
+                p.vector_store_store = self
            elif api == Api.datasetio:
                p.dataset_store = self
            elif api == Api.scoring:
@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
        from .scoring_functions import ScoringFunctionsRoutingTable
        from .shields import ShieldsRoutingTable
        from .toolgroups import ToolGroupsRoutingTable
-        from .vector_dbs import VectorDBsRoutingTable
+        from .vector_stores import VectorStoresRoutingTable

        def apiname_object():
            if isinstance(self, ModelsRoutingTable):
                return ("Inference", "model")
            elif isinstance(self, ShieldsRoutingTable):
                return ("Safety", "shield")
-            elif isinstance(self, VectorDBsRoutingTable):
-                return ("VectorIO", "vector_db")
+            elif isinstance(self, VectorStoresRoutingTable):
+                return ("VectorIO", "vector_store")
            elif isinstance(self, DatasetsRoutingTable):
                return ("DatasetIO", "dataset")
            elif isinstance(self, ScoringFunctionsRoutingTable):
--- a/llama_stack/core/routing_tables/vector_stores.py
+++ b/llama_stack/core/routing_tables/vector_stores.py
@ -6,15 +6,12 @@

 from typing import Any

-from pydantic import TypeAdapter
-
 from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.resource import ResourceType

-# Removed VectorDBs import to avoid exposing public API
+# Removed VectorStores import to avoid exposing public API
 from llama_stack.apis.vector_io.vector_io import (
-    OpenAICreateVectorStoreRequestWithExtraBody,
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
    VectorStoreSearchResponsePage,
 )
 from llama_stack.core.datatypes import (
-    VectorDBWithOwner,
+    VectorStoreWithOwner,
 )
 from llama_stack.log import get_logger

@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model
 logger = get_logger(name=__name__, category="core::routing_tables")


-class VectorDBsRoutingTable(CommonRoutingTableImpl):
-    """Internal routing table for vector_db operations.
+class VectorStoresRoutingTable(CommonRoutingTableImpl):
+    """Internal routing table for vector_store operations.

-    Does not inherit from VectorDBs to avoid exposing public API endpoints.
+    Does not inherit from VectorStores to avoid exposing public API endpoints.
    Only provides internal routing functionality for VectorIORouter.
    """

    # Internal methods only - no public API exposure

-    async def register_vector_db(
+    async def register_vector_store(
        self,
-        vector_db_id: str,
+        vector_store_id: str,
        embedding_model: str,
        embedding_dimension: int | None = 384,
        provider_id: str | None = None,
-        provider_vector_db_id: str | None = None,
-        vector_db_name: str | None = None,
+        provider_vector_store_id: str | None = None,
+        vector_store_name: str | None = None,
    ) -> Any:
        if provider_id is None:
            if len(self.impls_by_provider_id) > 0:
@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
            raise ModelNotFoundError(embedding_model)
        if model.model_type != ModelType.embedding:
            raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
-        if "embedding_dimension" not in model.metadata:
-            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")

-        try:
-            provider = self.impls_by_provider_id[provider_id]
-        except KeyError:
-            available_providers = list(self.impls_by_provider_id.keys())
-            raise ValueError(
-                f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
-            ) from None
-        logger.warning(
-            "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
-        )
-        request = OpenAICreateVectorStoreRequestWithExtraBody(
-            name=vector_db_name or vector_db_id,
-            embedding_model=embedding_model,
-            embedding_dimension=model.metadata["embedding_dimension"],
+        vector_store = VectorStoreWithOwner(
+            identifier=vector_store_id,
+            type=ResourceType.vector_store.value,
            provider_id=provider_id,
-            provider_vector_db_id=provider_vector_db_id,
+            provider_resource_id=provider_vector_store_id,
+            embedding_model=embedding_model,
+            embedding_dimension=embedding_dimension,
+            vector_store_name=vector_store_name,
        )
-        vector_store = await provider.openai_create_vector_store(request)
-
-        vector_store_id = vector_store.id
-        actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
-        logger.warning(
-            f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
-        )
-
-        vector_db_data = {
-            "identifier": vector_store_id,
-            "type": ResourceType.vector_db.value,
-            "provider_id": provider_id,
-            "provider_resource_id": actual_provider_vector_db_id,
-            "embedding_model": embedding_model,
-            "embedding_dimension": model.metadata["embedding_dimension"],
-            "vector_db_name": vector_store.name,
-        }
-        vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
-        await self.register_object(vector_db)
-        return vector_db
+        await self.register_object(vector_store)
+        return vector_store

    async def openai_retrieve_vector_store(
        self,
        vector_store_id: str,
    ) -> VectorStoreObject:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store(vector_store_id)

@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        expires_after: dict[str, Any] | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> VectorStoreObject:
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_update_vector_store(
            vector_store_id=vector_store_id,
@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        self,
        vector_store_id: str,
    ) -> VectorStoreDeleteResponse:
-        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+        await self.assert_action_allowed("delete", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        result = await provider.openai_delete_vector_store(vector_store_id)
-        await self.unregister_vector_db(vector_store_id)
+        await self.unregister_vector_store(vector_store_id)
        return result

-    async def unregister_vector_db(self, vector_store_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
        """Remove the vector store from the routing table registry."""
        try:
-            vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
-            if vector_db_obj:
-                await self.unregister_object(vector_db_obj)
+            vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
+            if vector_store_obj:
+                await self.unregister_object(vector_store_obj)
        except Exception as e:
            # Log the error but don't fail the operation
            logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_search_vector_store(
            vector_store_id=vector_store_id,
@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        attributes: dict[str, Any] | None = None,
        chunking_strategy: VectorStoreChunkingStrategy | None = None,
    ) -> VectorStoreFileObject:
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_attach_file_to_vector_store(
            vector_store_id=vector_store_id,
@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        before: str | None = None,
        filter: VectorStoreFileStatus | None = None,
    ) -> list[VectorStoreFileObject]:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_list_files_in_vector_store(
            vector_store_id=vector_store_id,
@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        vector_store_id: str,
        file_id: str,
    ) -> VectorStoreFileObject:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file(
            vector_store_id=vector_store_id,
@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        vector_store_id: str,
        file_id: str,
    ) -> VectorStoreFileContentsResponse:
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
            vector_store_id=vector_store_id,
@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        file_id: str,
        attributes: dict[str, Any],
    ) -> VectorStoreFileObject:
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_update_vector_store_file(
            vector_store_id=vector_store_id,
@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        vector_store_id: str,
        file_id: str,
    ) -> VectorStoreFileDeleteResponse:
-        await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+        await self.assert_action_allowed("delete", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_delete_vector_store_file(
            vector_store_id=vector_store_id,
@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        attributes: dict[str, Any] | None = None,
        chunking_strategy: Any | None = None,
    ):
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_create_vector_store_file_batch(
            vector_store_id=vector_store_id,
@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        batch_id: str,
        vector_store_id: str,
    ):
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_batch(
            batch_id=batch_id,
@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        limit: int | None = 20,
        order: str | None = "desc",
    ):
-        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_list_files_in_vector_store_file_batch(
            batch_id=batch_id,
@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
        batch_id: str,
        vector_store_id: str,
    ):
-        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        await self.assert_action_allowed("update", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_cancel_vector_store_file_batch(
            batch_id=batch_id,
--- a/llama_stack/core/server/server.py
+++ b/llama_stack/core/server/server.py
@ -56,7 +56,7 @@ from llama_stack.core.stack import (
 from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
-from llama_stack.log import get_logger
+from llama_stack.log import get_logger, setup_logging
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
 from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
@ -374,6 +374,9 @@ def create_app() -> StackApp:
    Returns:
        Configured StackApp instance.
    """
+    # Initialize logging from environment variables first
+    setup_logging()
+
    config_file = os.getenv("LLAMA_STACK_CONFIG")
    if config_file is None:
        raise ValueError("LLAMA_STACK_CONFIG environment variable is required")
--- a/llama_stack/core/ui/README.md
+++ b/llama_stack/core/ui/README.md
@ -9,7 +9,7 @@
 1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).

 ```
-llama stack build --distro together --image-type venv
+llama stack list-deps together | xargs -L1 uv pip install

 llama stack run together
 ```
--- a/llama_stack/core/ui/page/playground/tools.py
+++ b/llama_stack/core/ui/page/playground/tools.py
@ -32,7 +32,7 @@ def tool_chat_page():
    tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
    mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
    builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
-    selected_vector_dbs = []
+    selected_vector_stores = []

    def reset_agent():
        st.session_state.clear()
@ -55,13 +55,13 @@ def tool_chat_page():
        )

        if "builtin::rag" in toolgroup_selection:
-            vector_dbs = llama_stack_api.client.vector_dbs.list() or []
-            if not vector_dbs:
+            vector_stores = llama_stack_api.client.vector_stores.list() or []
+            if not vector_stores:
                st.info("No vector databases available for selection.")
-            vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
-            selected_vector_dbs = st.multiselect(
+            vector_stores = [vector_store.identifier for vector_store in vector_stores]
+            selected_vector_stores = st.multiselect(
                label="Select Document Collections to use in RAG queries",
-                options=vector_dbs,
+                options=vector_stores,
                on_change=reset_agent,
            )

@ -119,7 +119,7 @@ def tool_chat_page():
            tool_dict = dict(
                name="builtin::rag",
                args={
-                    "vector_db_ids": list(selected_vector_dbs),
+                    "vector_store_ids": list(selected_vector_stores),
                },
            )
            toolgroup_selection[i] = tool_dict
--- a/llama_stack/distributions/dell/doc_template.md
+++ b/llama_stack/distributions/dell/doc_template.md
@ -157,7 +157,7 @@ docker run \
 Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.

 ```bash
-llama stack build --distro {{ name }} --image-type conda
+llama stack list-deps {{ name }} | xargs -L1 pip install
 INFERENCE_MODEL=$INFERENCE_MODEL \
 DEH_URL=$DEH_URL \
 CHROMA_URL=$CHROMA_URL \
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -166,14 +166,26 @@ class CustomFileHandler(logging.FileHandler):
        super().emit(record)


-def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None:
+def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | None = None) -> None:
    """
    Configure logging based on the provided category log levels and an optional log file.
+    If category_levels or log_file are not provided, they will be read from environment variables.

    Parameters:
-        category_levels (Dict[str, int]): A dictionary mapping categories to their log levels.
-        log_file (str): Path to a log file to additionally pipe the logs into
+        category_levels (Dict[str, int] | None): A dictionary mapping categories to their log levels.
+            If None, reads from LLAMA_STACK_LOGGING environment variable and uses defaults.
+        log_file (str | None): Path to a log file to additionally pipe the logs into.
+            If None, reads from LLAMA_STACK_LOG_FILE environment variable.
    """
+    # Read from environment variables if not explicitly provided
+    if category_levels is None:
+        category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
+        env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
+        if env_config:
+            category_levels.update(parse_environment_config(env_config))
+
+    if log_file is None:
+        log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
    log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"

    class CategoryFilter(logging.Filter):
@ -224,12 +236,30 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None
            }
        },
        "loggers": {
-            category: {
-                "handlers": list(handlers.keys()),  # Apply all handlers
-                "level": category_levels.get(category, DEFAULT_LOG_LEVEL),
-                "propagate": False,  # Disable propagation to root logger
-            }
-            for category in CATEGORIES
+            **{
+                category: {
+                    "handlers": list(handlers.keys()),  # Apply all handlers
+                    "level": category_levels.get(category, DEFAULT_LOG_LEVEL),
+                    "propagate": False,  # Disable propagation to root logger
+                }
+                for category in CATEGORIES
+            },
+            # Explicitly configure uvicorn loggers to preserve their INFO level
+            "uvicorn": {
+                "handlers": list(handlers.keys()),
+                "level": logging.INFO,
+                "propagate": False,
+            },
+            "uvicorn.error": {
+                "handlers": list(handlers.keys()),
+                "level": logging.INFO,
+                "propagate": False,
+            },
+            "uvicorn.access": {
+                "handlers": list(handlers.keys()),
+                "level": logging.INFO,
+                "propagate": False,
+            },
        },
        "root": {
            "handlers": list(handlers.keys()),
@ -238,9 +268,13 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None
    }
    dictConfig(logging_config)

-    # Ensure third-party libraries follow the root log level
-    for _, logger in logging.root.manager.loggerDict.items():
+    # Ensure third-party libraries follow the root log level, but preserve
+    # already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
+    for name, logger in logging.root.manager.loggerDict.items():
        if isinstance(logger, logging.Logger):
+            # Skip infrastructure loggers (uvicorn, fastapi) and our own loggers
+            if name.startswith(("uvicorn", "fastapi", "llama_stack")):
+                continue
            logger.setLevel(root_level)


@ -278,12 +312,3 @@ def get_logger(
            log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
    logger.setLevel(log_level)
    return logging.LoggerAdapter(logger, {"category": category})
-
-
-env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
-if env_config:
-    _category_levels.update(parse_environment_config(env_config))
-
-log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
-
-setup_logging(_category_levels, log_file)
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
 from llama_stack.apis.scoring_functions import ScoringFn
 from llama_stack.apis.shields import Shield
 from llama_stack.apis.tools import ToolGroup
-from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.schema_utils import json_schema_type


@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
    async def unregister_shield(self, identifier: str) -> None: ...


-class VectorDBsProtocolPrivate(Protocol):
-    async def register_vector_db(self, vector_db: VectorDB) -> None: ...
+class VectorStoresProtocolPrivate(Protocol):
+    async def register_vector_store(self, vector_store: VectorStore) -> None: ...

-    async def unregister_vector_db(self, vector_db_id: str) -> None: ...
+    async def unregister_vector_store(self, vector_store_id: str) -> None: ...


 class DatasetsProtocolPrivate(Protocol):
--- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
@ -1,75 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-from datetime import UTC, datetime
-
-from opentelemetry.sdk.trace import ReadableSpan
-from opentelemetry.sdk.trace.export import SpanProcessor
-from opentelemetry.trace.status import StatusCode
-
-from llama_stack.log import get_logger
-
-logger = get_logger(name="console_span_processor", category="telemetry")
-
-
-class ConsoleSpanProcessor(SpanProcessor):
-    def __init__(self, print_attributes: bool = False):
-        self.print_attributes = print_attributes
-
-    def on_start(self, span: ReadableSpan, parent_context=None) -> None:
-        if span.attributes and span.attributes.get("__autotraced__"):
-            return
-
-        timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
-        logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]")
-
-    def on_end(self, span: ReadableSpan) -> None:
-        timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
-        span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]"
-        if span.status.status_code == StatusCode.ERROR:
-            span_context += " [bold red][ERROR][/bold red]"
-        elif span.status.status_code != StatusCode.UNSET:
-            span_context += f" [{span.status.status_code}]"
-        duration_ms = (span.end_time - span.start_time) / 1e6
-        span_context += f" ({duration_ms:.2f}ms)"
-        logger.info(span_context)
-
-        if self.print_attributes and span.attributes:
-            for key, value in span.attributes.items():
-                if key.startswith("__"):
-                    continue
-                str_value = str(value)
-                if len(str_value) > 1000:
-                    str_value = str_value[:997] + "..."
-                logger.info(f"    [dim]{key}[/dim]: {str_value}")
-
-        for event in span.events:
-            event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
-            severity = event.attributes.get("severity", "info")
-            message = event.attributes.get("message", event.name)
-            if isinstance(message, dict) or isinstance(message, list):
-                message = json.dumps(message, indent=2)
-            severity_color = {
-                "error": "red",
-                "warn": "yellow",
-                "info": "white",
-                "debug": "dim",
-            }.get(severity, "white")
-            logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}")
-            if event.attributes:
-                for key, value in event.attributes.items():
-                    if key.startswith("__") or key in ["message", "severity"]:
-                        continue
-                    logger.info(f"[dim]{key}[/dim]: {value}")
-
-    def shutdown(self) -> None:
-        """Shutdown the processor."""
-        pass
-
-    def force_flush(self, timeout_millis: float | None = None) -> bool:
-        """Force flush any pending spans."""
-        return True
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -79,8 +79,10 @@ class TelemetryAdapter(Telemetry):
                metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
                metric_provider = MeterProvider(metric_readers=[metric_reader])
                metrics.set_meter_provider(metric_provider)
+            self.is_otel_endpoint_set = True
        else:
            logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
+            self.is_otel_endpoint_set = False

        self.meter = metrics.get_meter(__name__)
        self._lock = _global_lock
@ -89,7 +91,8 @@ class TelemetryAdapter(Telemetry):
        pass

    async def shutdown(self) -> None:
-        trace.get_tracer_provider().force_flush()
+        if self.is_otel_endpoint_set:
+            trace.get_tracer_provider().force_flush()

    async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
        if isinstance(event, UnstructuredLogEvent):
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -17,21 +17,21 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex

 from .config import FaissVectorIOConfig

 logger = get_logger(name=__name__, category="vector_io")

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
 FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex):
        )


-class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
-        self.cache: dict[str, VectorDBWithIndex] = {}
+        self.cache: dict[str, VectorStoreWithIndex] = {}

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.persistence)
        # Load existing banks from kvstore
        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)

-        for vector_db_data in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(
-                vector_db,
-                await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store,
+                await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
                self.inference_api,
            )
-            self.cache[vector_db.identifier] = index
+            self.cache[vector_store.identifier] = index

        # Load existing OpenAI vector stores into the in-memory cache
        await self.initialize_openai_vector_stores()
@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
        except Exception as e:
            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        assert self.kvstore is not None

-        key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
-        await self.kvstore.set(key=key, value=vector_db.model_dump_json())
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())

        # Store in cache
-        self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
            inference_api=self.inference_api,
        )

-    async def list_vector_dbs(self) -> list[VectorDB]:
-        return [i.vector_db for i in self.cache.values()]
+    async def list_vector_stores(self) -> list[VectorStore]:
+        return [i.vector_store for i in self.cache.values()]

-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
        assert self.kvstore is not None

-        if vector_db_id not in self.cache:
-            logger.warning(f"Vector DB {vector_db_id} not found")
+        if vector_store_id not in self.cache:
            return

-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
-        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]
+        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
        index = self.cache.get(vector_db_id)
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -17,10 +17,10 @@ from numpy.typing import NDArray
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
    RERANKER_TYPE_RRF,
    ChunkForDeletion,
    EmbeddingIndex,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator

@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid"
 SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex):
        await asyncio.to_thread(_delete_chunks)


-class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    """
    A VectorIO implementation using SQLite + sqlite_vec.
-    This class handles vector database registration (with metadata stored in a table named `vector_dbs`)
-    and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
+    This class handles vector database registration (with metadata stored in a table named `vector_stores`)
+    and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
    """

    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
-        self.cache: dict[str, VectorDBWithIndex] = {}
-        self.vector_db_store = None
+        self.cache: dict[str, VectorStoreWithIndex] = {}
+        self.vector_store_table = None

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.persistence)

        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
-        for db_json in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(db_json)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
+        for db_json in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(db_json)
            index = await SQLiteVecIndex.create(
-                vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
+                vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
            )
-            self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
+            self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)

        # Load existing OpenAI vector stores into the in-memory cache
        await self.initialize_openai_vector_stores()
@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
        # Clean up mixin resources (file batch tasks)
        await super().shutdown()

-    async def list_vector_dbs(self) -> list[VectorDB]:
-        return [v.vector_db for v in self.cache.values()]
+    async def list_vector_stores(self) -> list[VectorStore]:
+        return [v.vector_store for v in self.cache.values()]

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
-        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
-        self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
+        index = await SQLiteVecIndex.create(
+            vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
+        )
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)

-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]

-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)

-        vector_db = self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)

-        index = VectorDBWithIndex(
-            vector_db=vector_db,
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
            index=SQLiteVecIndex(
-                dimension=vector_db.embedding_dimension,
+                dimension=vector_store.embedding_dimension,
                db_path=self.config.db_path,
-                bank_id=vector_db.identifier,
+                bank_id=vector_store.identifier,
                kvstore=self.kvstore,
            ),
            inference_api=self.inference_api,
        )
-        self.cache[vector_db_id] = index
+        self.cache[vector_store_id] = index
        return index

-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id not in self.cache:
-            logger.warning(f"Vector DB {vector_db_id} not found")
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id not in self.cache:
            return
-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)
-        # The VectorDBWithIndex helper is expected to compute embeddings via the inference_api
+        # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
        # and then call our index's add_chunks.
        await index.insert_chunks(chunks)

    async def query_chunks(
        self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)
        return await index.query_chunks(query, params)

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from a sqlite_vec index."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
        if not index:
            raise VectorStoreNotFoundError(store_id)

--- a/llama_stack/providers/remote/datasetio/nvidia/README.md
+++ b/llama_stack/providers/remote/datasetio/nvidia/README.md
@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service.
 Build the NVIDIA environment:

 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```

 ### Basic Usage using the LlamaStack Python Client
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM.
 Build the NVIDIA environment:

 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```

 ### Basic Usage using the LlamaStack Python Client
--- a/llama_stack/providers/remote/inference/nvidia/init.py
+++ b/llama_stack/providers/remote/inference/nvidia/init.py
@ -10,7 +10,7 @@ from .config import NVIDIAConfig


 async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
-    # import dynamically so `llama stack build` does not fail due to missing dependencies
+    # import dynamically so `llama stack list-deps` does not fail due to missing dependencies
    from .nvidia import NVIDIAInferenceAdapter

    if not isinstance(config, NVIDIAConfig):
--- a/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/llama_stack/providers/remote/post_training/nvidia/README.md
@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service
 Build the NVIDIA environment:

 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```

 ### Basic Usage using the LlamaStack Python Client
--- a/llama_stack/providers/remote/safety/nvidia/README.md
+++ b/llama_stack/providers/remote/safety/nvidia/README.md
@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV
 Build the NVIDIA environment:

 ```bash
-llama stack build --distro nvidia --image-type venv
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```

 ### Basic Usage using the LlamaStack Python Client
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -13,15 +13,15 @@ from numpy.typing import NDArray

 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex

 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig

@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma")
 ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex):
        raise NotImplementedError("Hybrid search is not supported in Chroma")


-class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    def __init__(
        self,
        config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        self.inference_api = inference_api
        self.client = None
        self.cache = {}
-        self.vector_db_store = None
+        self.vector_store_table = None

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.persistence)
-        self.vector_db_store = self.kvstore
+        self.vector_store_table = self.kvstore

        if isinstance(self.config, RemoteChromaVectorIOConfig):
            log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        # Clean up mixin resources (file batch tasks)
        await super().shutdown()

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        collection = await maybe_await(
            self.client.get_or_create_collection(
-                name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()}
+                name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
            )
        )
-        self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db, ChromaIndex(self.client, collection), self.inference_api
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store, ChromaIndex(self.client, collection), self.inference_api
        )

-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id not in self.cache:
-            log.warning(f"Vector DB {vector_db_id} not found")
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id not in self.cache:
+            log.warning(f"Vector DB {vector_store_id} not found")
            return

-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if index is None:
            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")

@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
    async def query_chunks(
        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)

        if index is None:
            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")

        return await index.query_chunks(query, params)

-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]

-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
-        collection = await maybe_await(self.client.get_collection(vector_db_id))
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
+        collection = await maybe_await(self.client.get_collection(vector_store_id))
        if not collection:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
-        index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
-        self.cache[vector_db_id] = index
+            raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
+        index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
+        self.cache[vector_store_id] = index
        return index

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from a Chroma vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
        if not index:
            raise ValueError(f"Vector DB {store_id} not found")

--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
    RERANKER_TYPE_WEIGHTED,
    ChunkForDeletion,
    EmbeddingIndex,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name

@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
 logger = get_logger(name=__name__, category="vector_io::milvus")

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::"
@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex):
            raise


-class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    def __init__(
        self,
        config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        self.cache = {}
        self.client = None
        self.inference_api = inference_api
-        self.vector_db_store = None
+        self.vector_store_table = None
        self.metadata_collection_name = "openai_vector_stores_metadata"

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.persistence)
        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)

-        for vector_db_data in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(
-                vector_db,
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store,
                index=MilvusIndex(
                    client=self.client,
-                    collection_name=vector_db.identifier,
+                    collection_name=vector_store.identifier,
                    consistency_level=self.config.consistency_level,
                    kvstore=self.kvstore,
                ),
                inference_api=self.inference_api,
            )
-            self.cache[vector_db.identifier] = index
+            self.cache[vector_store.identifier] = index
        if isinstance(self.config, RemoteMilvusVectorIOConfig):
            logger.info(f"Connecting to Milvus server at {self.config.uri}")
            self.client = MilvusClient(**self.config.model_dump(exclude_none=True))
@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        # Clean up mixin resources (file batch tasks)
        await super().shutdown()

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        if isinstance(self.config, RemoteMilvusVectorIOConfig):
            consistency_level = self.config.consistency_level
        else:
            consistency_level = "Strong"
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
            inference_api=self.inference_api,
        )

-        self.cache[vector_db.identifier] = index
+        self.cache[vector_store.identifier] = index

-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]

-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)

-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)

-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
            inference_api=self.inference_api,
        )
-        self.cache[vector_db_id] = index
+        self.cache[vector_store_id] = index
        return index

-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id in self.cache:
-            await self.cache[vector_db_id].index.delete()
-            del self.cache[vector_db_id]
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)

@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
    async def query_chunks(
        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)
        return await index.query_chunks(query, params)

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete a chunk from a milvus vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
        if not index:
            raise VectorStoreNotFoundError(store_id)

--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
 from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name

 from .config import PGVectorVectorIOConfig
@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig
 log = get_logger(name=__name__, category="vector_io::pgvector")

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::"
@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex):

    def __init__(
        self,
-        vector_db: VectorDB,
+        vector_store: VectorStore,
        dimension: int,
        conn: psycopg2.extensions.connection,
        kvstore: KVStore | None = None,
        distance_metric: str = "COSINE",
    ):
-        self.vector_db = vector_db
+        self.vector_store = vector_store
        self.dimension = dimension
        self.conn = conn
        self.kvstore = kvstore
@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex):
        try:
            with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                # Sanitize the table name by replacing hyphens with underscores
-                # SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
+                # SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens
                # when created with patterns like "test-vector-db-{uuid4()}"
-                sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
+                sanitized_identifier = sanitize_collection_name(self.vector_store.identifier)
                self.table_name = f"vs_{sanitized_identifier}"

                cur.execute(
@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex):
                """
                )
        except Exception as e:
-            log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
-            raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
+            log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
+            raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e

    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
        assert len(chunks) == len(embeddings), (
@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex):
            )


-class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    def __init__(
        self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
    ) -> None:
@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
        self.inference_api = inference_api
        self.conn = None
        self.cache = {}
-        self.vector_db_store = None
+        self.vector_store_table = None
        self.metadata_collection_name = "openai_vector_stores_metadata"

    async def initialize(self) -> None:
@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
        # Clean up mixin resources (file batch tasks)
        await super().shutdown()

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        # Persist vector DB metadata in the KV store
        assert self.kvstore is not None
        # Upsert model metadata in Postgres
-        upsert_models(self.conn, [(vector_db.identifier, vector_db)])
+        upsert_models(self.conn, [(vector_store.identifier, vector_store)])

        # Create and cache the PGVector index table for the vector DB
        pgvector_index = PGVectorIndex(
-            vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
+            vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
        )
        await pgvector_index.initialize()
-        index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api)
-        self.cache[vector_db.identifier] = index
+        index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
+        self.cache[vector_store.identifier] = index

-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
        # Remove provider index and cache
-        if vector_db_id in self.cache:
-            await self.cache[vector_db_id].index.delete()
-            del self.cache[vector_db_id]
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]

        # Delete vector DB metadata from KV store
        assert self.kvstore is not None
-        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}")
+        await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        await index.insert_chunks(chunks)

    async def query_chunks(
        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        return await index.query_chunks(query, params)

-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]

-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)

-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)

-        index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
+        index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
        await index.initialize()
-        self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
-        return self.cache[vector_db_id]
+        self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
+        return self.cache[vector_store_id]

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete a chunk from a PostgreSQL vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
        if not index:
            raise VectorStoreNotFoundError(store_id)

--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference, InterleavedContent
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
    QueryChunksResponse,
@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import (
    VectorStoreChunkingStrategy,
    VectorStoreFileObject,
 )
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
-from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
+from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex

 from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig

@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id"

 # KV store prefixes for vector databases
 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::"


 def convert_id(_id: str) -> str:
@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex):
        await self.client.delete_collection(collection_name=self.collection_name)


-class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
+class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
    def __init__(
        self,
        config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        self.client: AsyncQdrantClient = None
        self.cache = {}
        self.inference_api = inference_api
-        self.vector_db_store = None
+        self.vector_store_table = None
        self._qdrant_lock = asyncio.Lock()

    async def initialize(self) -> None:
@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP

        start_key = VECTOR_DBS_PREFIX
        end_key = f"{VECTOR_DBS_PREFIX}\xff"
-        stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
+        stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)

-        for vector_db_data in stored_vector_dbs:
-            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api)
-            self.cache[vector_db.identifier] = index
+        for vector_store_data in stored_vector_stores:
+            vector_store = VectorStore.model_validate_json(vector_store_data)
+            index = VectorStoreWithIndex(
+                vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
+            )
+            self.cache[vector_store.identifier] = index
        self.openai_vector_stores = await self._load_openai_vector_stores()

    async def shutdown(self) -> None:
@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        # Clean up mixin resources (file batch tasks)
        await super().shutdown()

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        assert self.kvstore is not None
-        key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
-        await self.kvstore.set(key=key, value=vector_db.model_dump_json())
+        key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
+        await self.kvstore.set(key=key, value=vector_store.model_dump_json())

-        index = VectorDBWithIndex(
-            vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api
-        )
-
-        self.cache[vector_db.identifier] = index
-
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if vector_db_id in self.cache:
-            await self.cache[vector_db_id].index.delete()
-            del self.cache[vector_db_id]
-
-        assert self.kvstore is not None
-        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
-
-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
-
-        if self.vector_db_store is None:
-            raise ValueError(f"Vector DB not found {vector_db_id}")
-
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
-
-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=QdrantIndex(client=self.client, collection_name=vector_db.identifier),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=QdrantIndex(self.client, vector_store.identifier),
            inference_api=self.inference_api,
        )
-        self.cache[vector_db_id] = index
+
+        self.cache[vector_store.identifier] = index
+
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
+        if vector_store_id in self.cache:
+            await self.cache[vector_store_id].index.delete()
+            del self.cache[vector_store_id]
+
+        assert self.kvstore is not None
+        await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
+
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]
+
+        if self.vector_store_table is None:
+            raise ValueError(f"Vector DB not found {vector_store_id}")
+
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
+            inference_api=self.inference_api,
+        )
+        self.cache[vector_store_id] = index
        return index

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)

@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
    async def query_chunks(
        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)

@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
        """Delete chunks from a Qdrant vector store."""
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
        if not index:
            raise ValueError(f"Vector DB {store_id} not found")

--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -16,11 +16,11 @@ from llama_stack.apis.common.content_types import InterleavedContent
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
+from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
    RERANKER_TYPE_RRF,
    ChunkForDeletion,
    EmbeddingIndex,
-    VectorDBWithIndex,
+    VectorStoreWithIndex,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name

@ -37,7 +37,7 @@ from .config import WeaviateVectorIOConfig
 log = get_logger(name=__name__, category="vector_io::weaviate")

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::"
 VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::"
@ -257,14 +257,14 @@ class WeaviateIndex(EmbeddingIndex):
        return QueryChunksResponse(chunks=chunks, scores=scores)


-class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate):
+class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
    def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.client_cache = {}
        self.cache = {}
-        self.vector_db_store = None
+        self.vector_store_table = None
        self.metadata_collection_name = "openai_vector_stores_metadata"

    def _get_client(self) -> weaviate.WeaviateClient:
@ -300,11 +300,11 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
            end_key = f"{VECTOR_DBS_PREFIX}\xff"
            stored = await self.kvstore.values_in_range(start_key, end_key)
            for raw in stored:
-                vector_db = VectorDB.model_validate_json(raw)
+                vector_store = VectorStore.model_validate_json(raw)
                client = self._get_client()
-                idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore)
-                self.cache[vector_db.identifier] = VectorDBWithIndex(
-                    vector_db=vector_db, index=idx, inference_api=self.inference_api
+                idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
+                self.cache[vector_store.identifier] = VectorStoreWithIndex(
+                    vector_store=vector_store, index=idx, inference_api=self.inference_api
                )

            # Load OpenAI vector stores metadata into cache
@ -316,9 +316,9 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
        # Clean up mixin resources (file batch tasks)
        await super().shutdown()

-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        client = self._get_client()
-        sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
+        sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
        # Create collection if it doesn't exist
        if not client.collections.exists(sanitized_collection_name):
            client.collections.create(
@ -329,45 +329,45 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
                ],
            )

-        self.cache[vector_db.identifier] = VectorDBWithIndex(
-            vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
        )

-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
        client = self._get_client()
-        sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True)
-        if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
+        sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True)
+        if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
            return
        client.collections.delete(sanitized_collection_name)
-        await self.cache[vector_db_id].index.delete()
-        del self.cache[vector_db_id]
+        await self.cache[vector_store_id].index.delete()
+        del self.cache[vector_store_id]

-    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
-        if vector_db_id in self.cache:
-            return self.cache[vector_db_id]
+    async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
+        if vector_store_id in self.cache:
+            return self.cache[vector_store_id]

-        if self.vector_db_store is None:
-            raise VectorStoreNotFoundError(vector_db_id)
+        if self.vector_store_table is None:
+            raise VectorStoreNotFoundError(vector_store_id)

-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise VectorStoreNotFoundError(vector_db_id)
+        vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
+        if not vector_store:
+            raise VectorStoreNotFoundError(vector_store_id)

        client = self._get_client()
-        sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
+        sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
        if not client.collections.exists(sanitized_collection_name):
            raise ValueError(f"Collection with name `{sanitized_collection_name}` not found")

-        index = VectorDBWithIndex(
-            vector_db=vector_db,
-            index=WeaviateIndex(client=client, collection_name=vector_db.identifier),
+        index = VectorStoreWithIndex(
+            vector_store=vector_store,
+            index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
            inference_api=self.inference_api,
        )
-        self.cache[vector_db_id] = index
+        self.cache[vector_store_id] = index
        return index

    async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)

@ -376,14 +376,14 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
    async def query_chunks(
        self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
    ) -> QueryChunksResponse:
-        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        index = await self._get_and_cache_vector_store_index(vector_db_id)
        if not index:
            raise VectorStoreNotFoundError(vector_db_id)

        return await index.query_chunks(query, params)

    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
-        index = await self._get_and_cache_vector_db_index(store_id)
+        index = await self._get_and_cache_vector_store_index(store_id)
        if not index:
            raise ValueError(f"Vector DB {store_id} not found")

--- a/llama_stack/providers/utils/inference/embedding_mixin.py
+++ b/llama_stack/providers/utils/inference/embedding_mixin.py
@ -6,9 +6,12 @@

 import asyncio
 import base64
+import platform
 import struct
 from typing import TYPE_CHECKING

+import torch
+
 from llama_stack.log import get_logger

 if TYPE_CHECKING:
@ -24,6 +27,8 @@ from llama_stack.apis.inference import (

 EMBEDDING_MODELS = {}

+DARWIN = "Darwin"
+

 log = get_logger(name=__name__, category="providers::utils")

@ -83,6 +88,13 @@ class SentenceTransformerEmbeddingMixin:
        def _load_model():
            from sentence_transformers import SentenceTransformer

+            platform_name = platform.system()
+            if platform_name == DARWIN:
+                # PyTorch's OpenMP kernels can segfault on macOS when spawned from background
+                # threads with the default parallel settings, so force a single-threaded CPU run.
+                log.debug(f"Constraining torch threads on {platform_name} to a single worker")
+                torch.set_num_threads(1)
+
            return SentenceTransformer(model, trust_remote_code=True)

        loaded_model = await asyncio.to_thread(_load_model)
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -17,7 +17,6 @@ from pydantic import TypeAdapter

 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files, OpenAIFileObject
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@ -43,6 +42,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreSearchResponse,
    VectorStoreSearchResponsePage,
 )
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.core.id_generation import generate_object_id
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
@ -63,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3  # Maximum concurrent file processing within
 FILE_BATCH_CHUNK_SIZE = 10  # Process files in chunks of this size

 VERSION = "v3"
-VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
+VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC):
        pass

    @abstractmethod
-    async def register_vector_db(self, vector_db: VectorDB) -> None:
+    async def register_vector_store(self, vector_store: VectorStore) -> None:
        """Register a vector database (provider-specific implementation)."""
        pass

    @abstractmethod
-    async def unregister_vector_db(self, vector_db_id: str) -> None:
+    async def unregister_vector_store(self, vector_store_id: str) -> None:
        """Unregister a vector database (provider-specific implementation)."""
        pass

@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC):
        extra_body = params.model_extra or {}
        metadata = params.metadata or {}

-        provider_vector_db_id = extra_body.get("provider_vector_db_id")
+        provider_vector_store_id = extra_body.get("provider_vector_store_id")

        # Use embedding info from metadata if available, otherwise from extra_body
        if metadata.get("embedding_model"):
@ -370,16 +370,6 @@ class OpenAIVectorStoreMixin(ABC):
            logger.debug(
                f"Using embedding config from metadata (takes precedence over extra_body): model='{embedding_model}', dimension={embedding_dimension}"
            )
-
-            # Check for conflicts with extra_body
-            if extra_body.get("embedding_model") and extra_body["embedding_model"] != embedding_model:
-                raise ValueError(
-                    f"Embedding model inconsistent between metadata ('{embedding_model}') and extra_body ('{extra_body['embedding_model']}')"
-                )
-            if extra_body.get("embedding_dimension") and extra_body["embedding_dimension"] != embedding_dimension:
-                raise ValueError(
-                    f"Embedding dimension inconsistent between metadata ({embedding_dimension}) and extra_body ({extra_body['embedding_dimension']})"
-                )
        else:
            embedding_model = extra_body.get("embedding_model")
            embedding_dimension = extra_body.get("embedding_dimension", EMBEDDING_DIMENSION)
@ -389,8 +379,8 @@ class OpenAIVectorStoreMixin(ABC):

        # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
        provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
-        # Derive the canonical vector_db_id (allow override, else generate)
-        vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
+        # Derive the canonical vector_store_id (allow override, else generate)
+        vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")

        if embedding_model is None:
            raise ValueError("embedding_model is required")
@ -398,19 +388,20 @@ class OpenAIVectorStoreMixin(ABC):
        if embedding_dimension is None:
            raise ValueError("Embedding dimension is required")

-        # Register the VectorDB backing this vector store
+        # Register the VectorStore backing this vector store
        if provider_id is None:
            raise ValueError("Provider ID is required but was not provided")

-        vector_db = VectorDB(
-            identifier=vector_db_id,
+        # call to the provider to create any index, etc.
+        vector_store = VectorStore(
+            identifier=vector_store_id,
            embedding_dimension=embedding_dimension,
            embedding_model=embedding_model,
            provider_id=provider_id,
-            provider_resource_id=vector_db_id,
-            vector_db_name=params.name,
+            provider_resource_id=vector_store_id,
+            vector_store_name=params.name,
        )
-        await self.register_vector_db(vector_db)
+        await self.register_vector_store(vector_store)

        # Create OpenAI vector store metadata
        status = "completed"
@ -424,7 +415,7 @@ class OpenAIVectorStoreMixin(ABC):
            total=0,
        )
        store_info: dict[str, Any] = {
-            "id": vector_db_id,
+            "id": vector_store_id,
            "object": "vector_store",
            "created_at": created_at,
            "name": params.name,
@ -441,23 +432,23 @@ class OpenAIVectorStoreMixin(ABC):
        # Add provider information to metadata if provided
        if provider_id:
            metadata["provider_id"] = provider_id
-        if provider_vector_db_id:
-            metadata["provider_vector_db_id"] = provider_vector_db_id
+        if provider_vector_store_id:
+            metadata["provider_vector_store_id"] = provider_vector_store_id
        store_info["metadata"] = metadata

        # Save to persistent storage (provider-specific)
-        await self._save_openai_vector_store(vector_db_id, store_info)
+        await self._save_openai_vector_store(vector_store_id, store_info)

        # Store in memory cache
-        self.openai_vector_stores[vector_db_id] = store_info
+        self.openai_vector_stores[vector_store_id] = store_info

        # Now that our vector store is created, attach any files that were provided
        file_ids = params.file_ids or []
-        tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids]
+        tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
        await asyncio.gather(*tasks)

        # Get the updated store info and return it
-        store_info = self.openai_vector_stores[vector_db_id]
+        store_info = self.openai_vector_stores[vector_store_id]
        return VectorStoreObject.model_validate(store_info)

    async def openai_list_vector_stores(
@ -567,7 +558,7 @@ class OpenAIVectorStoreMixin(ABC):

        # Also delete the underlying vector DB
        try:
-            await self.unregister_vector_db(vector_store_id)
+            await self.unregister_vector_store(vector_store_id)
        except Exception as e:
            logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}")

--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -23,8 +23,8 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
 from llama_stack.apis.tools import RAGDocument
-from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
+from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import Api
@ -187,7 +187,7 @@ def make_overlapped_chunks(
            updated_timestamp=int(time.time()),
            chunk_window=chunk_window,
            chunk_tokenizer=default_tokenizer,
-            chunk_embedding_model=None,  # This will be set in `VectorDBWithIndex.insert_chunks`
+            chunk_embedding_model=None,  # This will be set in `VectorStoreWithIndex.insert_chunks`
            content_token_count=len(toks),
            metadata_token_count=len(metadata_tokens),
        )
@ -255,8 +255,8 @@ class EmbeddingIndex(ABC):


@dataclass
-class VectorDBWithIndex:
-    vector_db: VectorDB
+class VectorStoreWithIndex:
+    vector_store: VectorStore
    index: EmbeddingIndex
    inference_api: Api.inference

@ -269,14 +269,14 @@ class VectorDBWithIndex:
            if c.embedding is None:
                chunks_to_embed.append(c)
                if c.chunk_metadata:
-                    c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model
-                    c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension
+                    c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model
+                    c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension
            else:
-                _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
+                _validate_embedding(c.embedding, i, self.vector_store.embedding_dimension)

        if chunks_to_embed:
            params = OpenAIEmbeddingsRequestWithExtraBody(
-                model=self.vector_db.embedding_model,
+                model=self.vector_store.embedding_model,
                input=[c.content for c in chunks_to_embed],
            )
            resp = await self.inference_api.openai_embeddings(params)
@ -319,7 +319,7 @@ class VectorDBWithIndex:
            return await self.index.query_keyword(query_string, k, score_threshold)

        params = OpenAIEmbeddingsRequestWithExtraBody(
-            model=self.vector_db.embedding_model,
+            model=self.vector_store.embedding_model,
            input=[query_string],
        )
        embeddings_response = await self.inference_api.openai_embeddings(params)
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@ -18,7 +18,7 @@
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "framer-motion": "^12.23.24",
-        "llama-stack-client": "^0.2.23",
+        "llama-stack-client": "^0.3.0",
        "lucide-react": "^0.545.0",
        "next": "15.5.4",
        "next-auth": "^4.24.11",
@ -75,20 +75,6 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
-    "node_modules/@ampproject/remapping": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz",
-      "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==",
-      "dev": true,
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@jridgewell/gen-mapping": "^0.3.5",
-        "@jridgewell/trace-mapping": "^0.3.24"
-      },
-      "engines": {
-        "node": ">=6.0.0"
-      }
-    },
    "node_modules/@asamuzakjp/css-color": {
      "version": "3.2.0",
      "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz",
@ -2072,9 +2058,9 @@
      }
    },
    "node_modules/@jridgewell/sourcemap-codec": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz",
-      "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==",
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==",
      "dev": true,
      "license": "MIT"
    },
@ -3199,61 +3185,54 @@
      }
    },
    "node_modules/@tailwindcss/node": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.6.tgz",
-      "integrity": "sha512-ed6zQbgmKsjsVvodAS1q1Ld2BolEuxJOSyyNc+vhkjdmfNUDCmQnlXBfQkHrlzNmslxHsQU/bFmzcEbv4xXsLg==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.1.14.tgz",
+      "integrity": "sha512-hpz+8vFk3Ic2xssIA3e01R6jkmsAhvkQdXlEbRTk6S10xDAtiQiM3FyvZVGsucefq764euO/b8WUW9ysLdThHw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@ampproject/remapping": "^2.3.0",
-        "enhanced-resolve": "^5.18.1",
-        "jiti": "^2.4.2",
-        "lightningcss": "1.29.2",
-        "magic-string": "^0.30.17",
+        "@jridgewell/remapping": "^2.3.4",
+        "enhanced-resolve": "^5.18.3",
+        "jiti": "^2.6.0",
+        "lightningcss": "1.30.1",
+        "magic-string": "^0.30.19",
        "source-map-js": "^1.2.1",
-        "tailwindcss": "4.1.6"
+        "tailwindcss": "4.1.14"
      }
    },
-    "node_modules/@tailwindcss/node/node_modules/tailwindcss": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
-      "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
-      "dev": true,
-      "license": "MIT"
-    },
    "node_modules/@tailwindcss/oxide": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
-      "integrity": "sha512-0bpEBQiGx+227fW4G0fLQ8vuvyy5rsB1YIYNapTq3aRsJ9taF3f5cCaovDjN5pUGKKzcpMrZst/mhNaKAPOHOA==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.14.tgz",
+      "integrity": "sha512-23yx+VUbBwCg2x5XWdB8+1lkPajzLmALEfMb51zZUBYaYVPDQvBSD/WYDqiVyBIo2BZFa3yw1Rpy3G2Jp+K0dw==",
      "dev": true,
      "hasInstallScript": true,
      "license": "MIT",
      "dependencies": {
        "detect-libc": "^2.0.4",
-        "tar": "^7.4.3"
+        "tar": "^7.5.1"
      },
      "engines": {
        "node": ">= 10"
      },
      "optionalDependencies": {
-        "@tailwindcss/oxide-android-arm64": "4.1.6",
-        "@tailwindcss/oxide-darwin-arm64": "4.1.6",
-        "@tailwindcss/oxide-darwin-x64": "4.1.6",
-        "@tailwindcss/oxide-freebsd-x64": "4.1.6",
-        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.6",
-        "@tailwindcss/oxide-linux-arm64-gnu": "4.1.6",
-        "@tailwindcss/oxide-linux-arm64-musl": "4.1.6",
-        "@tailwindcss/oxide-linux-x64-gnu": "4.1.6",
-        "@tailwindcss/oxide-linux-x64-musl": "4.1.6",
-        "@tailwindcss/oxide-wasm32-wasi": "4.1.6",
-        "@tailwindcss/oxide-win32-arm64-msvc": "4.1.6",
-        "@tailwindcss/oxide-win32-x64-msvc": "4.1.6"
+        "@tailwindcss/oxide-android-arm64": "4.1.14",
+        "@tailwindcss/oxide-darwin-arm64": "4.1.14",
+        "@tailwindcss/oxide-darwin-x64": "4.1.14",
+        "@tailwindcss/oxide-freebsd-x64": "4.1.14",
+        "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.14",
+        "@tailwindcss/oxide-linux-arm64-gnu": "4.1.14",
+        "@tailwindcss/oxide-linux-arm64-musl": "4.1.14",
+        "@tailwindcss/oxide-linux-x64-gnu": "4.1.14",
+        "@tailwindcss/oxide-linux-x64-musl": "4.1.14",
+        "@tailwindcss/oxide-wasm32-wasi": "4.1.14",
+        "@tailwindcss/oxide-win32-arm64-msvc": "4.1.14",
+        "@tailwindcss/oxide-win32-x64-msvc": "4.1.14"
      }
    },
    "node_modules/@tailwindcss/oxide-android-arm64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.6.tgz",
-      "integrity": "sha512-VHwwPiwXtdIvOvqT/0/FLH/pizTVu78FOnI9jQo64kSAikFSZT7K4pjyzoDpSMaveJTGyAKvDjuhxJxKfmvjiQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.14.tgz",
+      "integrity": "sha512-a94ifZrGwMvbdeAxWoSuGcIl6/DOP5cdxagid7xJv6bwFp3oebp7y2ImYsnZBMTwjn5Ev5xESvS3FFYUGgPODQ==",
      "cpu": [
        "arm64"
      ],
@ -3268,9 +3247,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-darwin-arm64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.6.tgz",
-      "integrity": "sha512-weINOCcqv1HVBIGptNrk7c6lWgSFFiQMcCpKM4tnVi5x8OY2v1FrV76jwLukfT6pL1hyajc06tyVmZFYXoxvhQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.14.tgz",
+      "integrity": "sha512-HkFP/CqfSh09xCnrPJA7jud7hij5ahKyWomrC3oiO2U9i0UjP17o9pJbxUN0IJ471GTQQmzwhp0DEcpbp4MZTA==",
      "cpu": [
        "arm64"
      ],
@ -3285,9 +3264,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-darwin-x64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.6.tgz",
-      "integrity": "sha512-3FzekhHG0ww1zQjQ1lPoq0wPrAIVXAbUkWdWM8u5BnYFZgb9ja5ejBqyTgjpo5mfy0hFOoMnMuVDI+7CXhXZaQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.14.tgz",
+      "integrity": "sha512-eVNaWmCgdLf5iv6Qd3s7JI5SEFBFRtfm6W0mphJYXgvnDEAZ5sZzqmI06bK6xo0IErDHdTA5/t7d4eTfWbWOFw==",
      "cpu": [
        "x64"
      ],
@ -3302,9 +3281,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-freebsd-x64": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.6.tgz",
-      "integrity": "sha512-4m5F5lpkBZhVQJq53oe5XgJ+aFYWdrgkMwViHjRsES3KEu2m1udR21B1I77RUqie0ZYNscFzY1v9aDssMBZ/1w==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.14.tgz",
+      "integrity": "sha512-QWLoRXNikEuqtNb0dhQN6wsSVVjX6dmUFzuuiL09ZeXju25dsei2uIPl71y2Ic6QbNBsB4scwBoFnlBfabHkEw==",
      "cpu": [
        "x64"
      ],
@ -3319,9 +3298,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.6.tgz",
-      "integrity": "sha512-qU0rHnA9P/ZoaDKouU1oGPxPWzDKtIfX7eOGi5jOWJKdxieUJdVV+CxWZOpDWlYTd4N3sFQvcnVLJWJ1cLP5TA==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.14.tgz",
+      "integrity": "sha512-VB4gjQni9+F0VCASU+L8zSIyjrLLsy03sjcR3bM0V2g4SNamo0FakZFKyUQ96ZVwGK4CaJsc9zd/obQy74o0Fw==",
      "cpu": [
        "arm"
      ],
@ -3336,9 +3315,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-linux-arm64-gnu": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.6.tgz",
-      "integrity": "sha512-jXy3TSTrbfgyd3UxPQeXC3wm8DAgmigzar99Km9Sf6L2OFfn/k+u3VqmpgHQw5QNfCpPe43em6Q7V76Wx7ogIQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.14.tgz",
+      "integrity": "sha512-qaEy0dIZ6d9vyLnmeg24yzA8XuEAD9WjpM5nIM1sUgQ/Zv7cVkharPDQcmm/t/TvXoKo/0knI3me3AGfdx6w1w==",
      "cpu": [
        "arm64"
      ],
@ -3353,9 +3332,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-linux-arm64-musl": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.6.tgz",
-      "integrity": "sha512-8kjivE5xW0qAQ9HX9reVFmZj3t+VmljDLVRJpVBEoTR+3bKMnvC7iLcoSGNIUJGOZy1mLVq7x/gerVg0T+IsYw==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.14.tgz",
+      "integrity": "sha512-ISZjT44s59O8xKsPEIesiIydMG/sCXoMBCqsphDm/WcbnuWLxxb+GcvSIIA5NjUw6F8Tex7s5/LM2yDy8RqYBQ==",
      "cpu": [
        "arm64"
      ],
@ -3370,9 +3349,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-linux-x64-gnu": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.6.tgz",
-      "integrity": "sha512-A4spQhwnWVpjWDLXnOW9PSinO2PTKJQNRmL/aIl2U/O+RARls8doDfs6R41+DAXK0ccacvRyDpR46aVQJJCoCg==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.1.14.tgz",
+      "integrity": "sha512-02c6JhLPJj10L2caH4U0zF8Hji4dOeahmuMl23stk0MU1wfd1OraE7rOloidSF8W5JTHkFdVo/O7uRUJJnUAJg==",
      "cpu": [
        "x64"
      ],
@ -3387,9 +3366,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-linux-x64-musl": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.6.tgz",
-      "integrity": "sha512-YRee+6ZqdzgiQAHVSLfl3RYmqeeaWVCk796MhXhLQu2kJu2COHBkqlqsqKYx3p8Hmk5pGCQd2jTAoMWWFeyG2A==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.14.tgz",
+      "integrity": "sha512-TNGeLiN1XS66kQhxHG/7wMeQDOoL0S33x9BgmydbrWAb9Qw0KYdd8o1ifx4HOGDWhVmJ+Ul+JQ7lyknQFilO3Q==",
      "cpu": [
        "x64"
      ],
@ -3404,9 +3383,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-wasm32-wasi": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.6.tgz",
-      "integrity": "sha512-qAp4ooTYrBQ5pk5jgg54/U1rCJ/9FLYOkkQ/nTE+bVMseMfB6O7J8zb19YTpWuu4UdfRf5zzOrNKfl6T64MNrQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.14.tgz",
+      "integrity": "sha512-uZYAsaW/jS/IYkd6EWPJKW/NlPNSkWkBlaeVBi/WsFQNP05/bzkebUL8FH1pdsqx4f2fH/bWFcUABOM9nfiJkQ==",
      "bundleDependencies": [
        "@napi-rs/wasm-runtime",
        "@emnapi/core",
@ -3422,21 +3401,81 @@
      "license": "MIT",
      "optional": true,
      "dependencies": {
-        "@emnapi/core": "^1.4.3",
-        "@emnapi/runtime": "^1.4.3",
-        "@emnapi/wasi-threads": "^1.0.2",
-        "@napi-rs/wasm-runtime": "^0.2.9",
-        "@tybys/wasm-util": "^0.9.0",
-        "tslib": "^2.8.0"
+        "@emnapi/core": "^1.5.0",
+        "@emnapi/runtime": "^1.5.0",
+        "@emnapi/wasi-threads": "^1.1.0",
+        "@napi-rs/wasm-runtime": "^1.0.5",
+        "@tybys/wasm-util": "^0.10.1",
+        "tslib": "^2.4.0"
      },
      "engines": {
        "node": ">=14.0.0"
      }
    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
+      "version": "1.5.0",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/wasi-threads": "1.1.0",
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
+      "version": "1.5.0",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
+      "version": "1.1.0",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
+      "version": "1.0.5",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/core": "^1.5.0",
+        "@emnapi/runtime": "^1.5.0",
+        "@tybys/wasm-util": "^0.10.1"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
+      "version": "0.10.1",
+      "dev": true,
+      "inBundle": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
+      "version": "2.8.1",
+      "dev": true,
+      "inBundle": true,
+      "license": "0BSD",
+      "optional": true
+    },
    "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.6.tgz",
-      "integrity": "sha512-nqpDWk0Xr8ELO/nfRUDjk1pc9wDJ3ObeDdNMHLaymc4PJBWj11gdPCWZFKSK2AVKjJQC7J2EfmSmf47GN7OuLg==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.14.tgz",
+      "integrity": "sha512-Az0RnnkcvRqsuoLH2Z4n3JfAef0wElgzHD5Aky/e+0tBUxUhIeIqFBTMNQvmMRSP15fWwmvjBxZ3Q8RhsDnxAA==",
      "cpu": [
        "arm64"
      ],
@ -3451,9 +3490,9 @@
      }
    },
    "node_modules/@tailwindcss/oxide-win32-x64-msvc": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.6.tgz",
-      "integrity": "sha512-5k9xF33xkfKpo9wCvYcegQ21VwIBU1/qEbYlVukfEIyQbEA47uK8AAwS7NVjNE3vHzcmxMYwd0l6L4pPjjm1rQ==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.14.tgz",
+      "integrity": "sha512-ttblVGHgf68kEE4om1n/n44I0yGPkCPbLsqzjvybhpwa6mKKtgFfAzy6btc3HRmuW7nHe0OOrSeNP9sQmmH9XA==",
      "cpu": [
        "x64"
      ],
@ -3468,26 +3507,19 @@
      }
    },
    "node_modules/@tailwindcss/postcss": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.6.tgz",
-      "integrity": "sha512-ELq+gDMBuRXPJlpE3PEen+1MhnHAQQrh2zF0dI1NXOlEWfr2qWf2CQdr5jl9yANv8RErQaQ2l6nIFO9OSCVq/g==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/postcss/-/postcss-4.1.14.tgz",
+      "integrity": "sha512-BdMjIxy7HUNThK87C7BC8I1rE8BVUsfNQSI5siQ4JK3iIa3w0XyVvVL9SXLWO//CtYTcp1v7zci0fYwJOjB+Zg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@alloc/quick-lru": "^5.2.0",
-        "@tailwindcss/node": "4.1.6",
-        "@tailwindcss/oxide": "4.1.6",
+        "@tailwindcss/node": "4.1.14",
+        "@tailwindcss/oxide": "4.1.14",
        "postcss": "^8.4.41",
-        "tailwindcss": "4.1.6"
+        "tailwindcss": "4.1.14"
      }
    },
-    "node_modules/@tailwindcss/postcss/node_modules/tailwindcss": {
-      "version": "4.1.6",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
-      "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
-      "dev": true,
-      "license": "MIT"
-    },
    "node_modules/@testing-library/dom": {
      "version": "10.4.1",
      "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
@ -3601,17 +3633,6 @@
      "dev": true,
      "license": "MIT"
    },
-    "node_modules/@tybys/wasm-util": {
-      "version": "0.9.0",
-      "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.9.0.tgz",
-      "integrity": "sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==",
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "tslib": "^2.4.0"
-      }
-    },
    "node_modules/@types/aria-query": {
      "version": "5.0.4",
      "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
@ -3812,12 +3833,12 @@
      "license": "MIT"
    },
    "node_modules/@types/node": {
-      "version": "24.3.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
-      "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==",
+      "version": "24.8.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.8.1.tgz",
+      "integrity": "sha512-alv65KGRadQVfVcG69MuB4IzdYVpRwMG/mq8KWOaoOdyY617P5ivaDiMCGOFDWD2sAn5Q0mR3mRtUOgm99hL9Q==",
      "license": "MIT",
      "dependencies": {
-        "undici-types": "~7.10.0"
+        "undici-types": "~7.14.0"
      }
    },
    "node_modules/@types/node-fetch": {
@ -5850,9 +5871,9 @@
      "license": "MIT"
    },
    "node_modules/enhanced-resolve": {
-      "version": "5.18.1",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.1.tgz",
-      "integrity": "sha512-ZSW3ma5GkcQBIpwZTSRAI8N71Uuwgs93IezB7mf7R60tC8ZbJideoDNKjHn2O9KIlx6rkGTTEk1xUCK2E1Y2Yg==",
+      "version": "5.18.3",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.3.tgz",
+      "integrity": "sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@ -9128,9 +9149,9 @@
      }
    },
    "node_modules/jiti": {
-      "version": "2.4.2",
-      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.4.2.tgz",
-      "integrity": "sha512-rg9zJN+G4n2nfJl5MW3BMygZX56zKPNVEYYqq7adpmMh4Jn2QNEwhvQlFy6jPVdcod7txZtKHWnyZiA3a0zP7A==",
+      "version": "2.6.1",
+      "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.6.1.tgz",
+      "integrity": "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==",
      "dev": true,
      "license": "MIT",
      "bin": {
@ -9368,9 +9389,9 @@
      }
    },
    "node_modules/lightningcss": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.2.tgz",
-      "integrity": "sha512-6b6gd/RUXKaw5keVdSEtqFVdzWnU5jMxTUjA2bVcMNPLwSQ08Sv/UodBVtETLCn7k4S1Ibxwh7k68IwLZPgKaA==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.30.1.tgz",
+      "integrity": "sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==",
      "dev": true,
      "license": "MPL-2.0",
      "dependencies": {
@ -9384,22 +9405,22 @@
        "url": "https://opencollective.com/parcel"
      },
      "optionalDependencies": {
-        "lightningcss-darwin-arm64": "1.29.2",
-        "lightningcss-darwin-x64": "1.29.2",
-        "lightningcss-freebsd-x64": "1.29.2",
-        "lightningcss-linux-arm-gnueabihf": "1.29.2",
-        "lightningcss-linux-arm64-gnu": "1.29.2",
-        "lightningcss-linux-arm64-musl": "1.29.2",
-        "lightningcss-linux-x64-gnu": "1.29.2",
-        "lightningcss-linux-x64-musl": "1.29.2",
-        "lightningcss-win32-arm64-msvc": "1.29.2",
-        "lightningcss-win32-x64-msvc": "1.29.2"
+        "lightningcss-darwin-arm64": "1.30.1",
+        "lightningcss-darwin-x64": "1.30.1",
+        "lightningcss-freebsd-x64": "1.30.1",
+        "lightningcss-linux-arm-gnueabihf": "1.30.1",
+        "lightningcss-linux-arm64-gnu": "1.30.1",
+        "lightningcss-linux-arm64-musl": "1.30.1",
+        "lightningcss-linux-x64-gnu": "1.30.1",
+        "lightningcss-linux-x64-musl": "1.30.1",
+        "lightningcss-win32-arm64-msvc": "1.30.1",
+        "lightningcss-win32-x64-msvc": "1.30.1"
      }
    },
    "node_modules/lightningcss-darwin-arm64": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.29.2.tgz",
-      "integrity": "sha512-cK/eMabSViKn/PG8U/a7aCorpeKLMlK0bQeNHmdb7qUnBkNPnL+oV5DjJUo0kqWsJUapZsM4jCfYItbqBDvlcA==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz",
+      "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==",
      "cpu": [
        "arm64"
      ],
@ -9418,9 +9439,9 @@
      }
    },
    "node_modules/lightningcss-darwin-x64": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.29.2.tgz",
-      "integrity": "sha512-j5qYxamyQw4kDXX5hnnCKMf3mLlHvG44f24Qyi2965/Ycz829MYqjrVg2H8BidybHBp9kom4D7DR5VqCKDXS0w==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz",
+      "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==",
      "cpu": [
        "x64"
      ],
@ -9439,9 +9460,9 @@
      }
    },
    "node_modules/lightningcss-freebsd-x64": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.29.2.tgz",
-      "integrity": "sha512-wDk7M2tM78Ii8ek9YjnY8MjV5f5JN2qNVO+/0BAGZRvXKtQrBC4/cn4ssQIpKIPP44YXw6gFdpUF+Ps+RGsCwg==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz",
+      "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==",
      "cpu": [
        "x64"
      ],
@ -9460,9 +9481,9 @@
      }
    },
    "node_modules/lightningcss-linux-arm-gnueabihf": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.29.2.tgz",
-      "integrity": "sha512-IRUrOrAF2Z+KExdExe3Rz7NSTuuJ2HvCGlMKoquK5pjvo2JY4Rybr+NrKnq0U0hZnx5AnGsuFHjGnNT14w26sg==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz",
+      "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==",
      "cpu": [
        "arm"
      ],
@ -9481,9 +9502,9 @@
      }
    },
    "node_modules/lightningcss-linux-arm64-gnu": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.29.2.tgz",
-      "integrity": "sha512-KKCpOlmhdjvUTX/mBuaKemp0oeDIBBLFiU5Fnqxh1/DZ4JPZi4evEH7TKoSBFOSOV3J7iEmmBaw/8dpiUvRKlQ==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz",
+      "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==",
      "cpu": [
        "arm64"
      ],
@ -9502,9 +9523,9 @@
      }
    },
    "node_modules/lightningcss-linux-arm64-musl": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.29.2.tgz",
-      "integrity": "sha512-Q64eM1bPlOOUgxFmoPUefqzY1yV3ctFPE6d/Vt7WzLW4rKTv7MyYNky+FWxRpLkNASTnKQUaiMJ87zNODIrrKQ==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz",
+      "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==",
      "cpu": [
        "arm64"
      ],
@ -9523,9 +9544,9 @@
      }
    },
    "node_modules/lightningcss-linux-x64-gnu": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.29.2.tgz",
-      "integrity": "sha512-0v6idDCPG6epLXtBH/RPkHvYx74CVziHo6TMYga8O2EiQApnUPZsbR9nFNrg2cgBzk1AYqEd95TlrsL7nYABQg==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.30.1.tgz",
+      "integrity": "sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==",
      "cpu": [
        "x64"
      ],
@ -9544,9 +9565,9 @@
      }
    },
    "node_modules/lightningcss-linux-x64-musl": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.29.2.tgz",
-      "integrity": "sha512-rMpz2yawkgGT8RULc5S4WiZopVMOFWjiItBT7aSfDX4NQav6M44rhn5hjtkKzB+wMTRlLLqxkeYEtQ3dd9696w==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz",
+      "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==",
      "cpu": [
        "x64"
      ],
@ -9565,9 +9586,9 @@
      }
    },
    "node_modules/lightningcss-win32-arm64-msvc": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.29.2.tgz",
-      "integrity": "sha512-nL7zRW6evGQqYVu/bKGK+zShyz8OVzsCotFgc7judbt6wnB2KbiKKJwBE4SGoDBQ1O94RjW4asrCjQL4i8Fhbw==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz",
+      "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==",
      "cpu": [
        "arm64"
      ],
@ -9586,9 +9607,9 @@
      }
    },
    "node_modules/lightningcss-win32-x64-msvc": {
-      "version": "1.29.2",
-      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.29.2.tgz",
-      "integrity": "sha512-EdIUW3B2vLuHmv7urfzMI/h2fmlnOQBk1xlsDxkN1tCWKjNFjfLhGxYk8C8mzpSfr+A6jFFIi8fU6LbQGsRWjA==",
+      "version": "1.30.1",
+      "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz",
+      "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==",
      "cpu": [
        "x64"
      ],
@ -9614,9 +9635,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.2.23",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.23.tgz",
-      "integrity": "sha512-J3YFH1HW2K70capejQxGlCyTgKdfx+sQf8Ab+HFi1j2Q00KtpHXB79RxejvBxjWC3X2E++P9iU57KdU2Tp/rIQ==",
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.0.tgz",
+      "integrity": "sha512-76K/t1doaGmlBbDxCADaral9Vccvys9P8pqAMIhwBhMAqWudCEORrMMhUSg+pjhamWmEKj3wa++d4zeOGbfN/w==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",
@ -9726,13 +9747,13 @@
      }
    },
    "node_modules/magic-string": {
-      "version": "0.30.17",
-      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz",
-      "integrity": "sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==",
+      "version": "0.30.19",
+      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.19.tgz",
+      "integrity": "sha512-2N21sPY9Ws53PZvsEpVtNuSW+ScYbQdp4b9qUaL+9QkHUrGFKo56Lg9Emg5s9V/qrtNBmiR01sYhUOwu3H+VOw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@jridgewell/sourcemap-codec": "^1.5.0"
+        "@jridgewell/sourcemap-codec": "^1.5.5"
      }
    },
    "node_modules/make-dir": {
@ -10717,9 +10738,9 @@
      }
    },
    "node_modules/minizlib": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz",
-      "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==",
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
+      "integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@ -10729,22 +10750,6 @@
        "node": ">= 18"
      }
    },
-    "node_modules/mkdirp": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
-      "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
-      "dev": true,
-      "license": "MIT",
-      "bin": {
-        "mkdirp": "dist/cjs/src/bin.js"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
    "node_modules/motion-dom": {
      "version": "12.23.23",
      "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.23.tgz",
@ -12989,34 +12994,37 @@
      }
    },
    "node_modules/tailwindcss": {
-      "version": "4.1.13",
-      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz",
-      "integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==",
+      "version": "4.1.14",
+      "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.14.tgz",
+      "integrity": "sha512-b7pCxjGO98LnxVkKjaZSDeNuljC4ueKUddjENJOADtubtdo8llTaJy7HwBMeLNSSo2N5QIAgklslK1+Ir8r6CA==",
      "dev": true,
      "license": "MIT"
    },
    "node_modules/tapable": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
-      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz",
+      "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==",
      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=6"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/webpack"
      }
    },
    "node_modules/tar": {
-      "version": "7.4.3",
-      "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
-      "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
+      "version": "7.5.1",
+      "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz",
+      "integrity": "sha512-nlGpxf+hv0v7GkWBK2V9spgactGOp0qvfWRxUMjqHyzrt3SgwE48DIv/FhqPHJYLHpgW1opq3nERbz5Anq7n1g==",
      "dev": true,
      "license": "ISC",
      "dependencies": {
        "@isaacs/fs-minipass": "^4.0.0",
        "chownr": "^3.0.0",
        "minipass": "^7.1.2",
-        "minizlib": "^3.0.1",
-        "mkdirp": "^3.0.1",
+        "minizlib": "^3.1.0",
        "yallist": "^5.0.0"
      },
      "engines": {
@ -13418,9 +13426,9 @@
      }
    },
    "node_modules/undici-types": {
-      "version": "7.10.0",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
-      "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
+      "version": "7.14.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.14.0.tgz",
+      "integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==",
      "license": "MIT"
    },
    "node_modules/unified": {
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@ -23,7 +23,7 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "framer-motion": "^12.23.24",
-    "llama-stack-client": "^0.2.23",
+    "llama-stack-client": "^0.3.0",
    "lucide-react": "^0.545.0",
    "next": "15.5.4",
    "next-auth": "^4.24.11",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ required-version = ">=0.7.0"

 [project]
 name = "llama_stack"
-version = "0.3.0rc4"
+version = "0.3.0"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@ -30,7 +30,7 @@ dependencies = [
    "httpx",
    "jinja2>=3.1.6",
    "jsonschema",
-    "llama-stack-client>=0.3.0rc4",
+    "llama-stack-client>=0.3.0",
    "openai>=1.107",                                  # for expires_after support
    "prompt-toolkit",
    "python-dotenv",
@ -55,7 +55,7 @@ dependencies = [
 ui = [
    "streamlit",
    "pandas",
-    "llama-stack-client>=0.3.0rc4",
+    "llama-stack-client>=0.3.0",
    "streamlit-option-menu",
 ]

--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@ -156,6 +156,16 @@ DISTRO=$(echo "$DISTRO" | sed 's/^docker://')

 CONTAINER_NAME="llama-stack-test-$DISTRO"

+should_copy_source() {
+    if [[ "$USE_COPY_NOT_MOUNT" == "true" ]]; then
+        return 0
+    fi
+    if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
+        return 0
+    fi
+    return 1
+}
+
 # Function to check if container is running
 is_container_running() {
    docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
@ -183,20 +193,29 @@ stop_container() {
 build_image() {
    echo "=== Building Docker Image for distribution: $DISTRO ==="
    # Get the repo root (parent of scripts directory)
-    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-    REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
+    local script_dir
+    script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+    local repo_root
+    repo_root=$(cd "$script_dir/.." && pwd)

-    # Determine whether to copy or mount source
-    # Copy in CI or if explicitly requested, otherwise mount for live development
-    BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
-    if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
-        echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
-        BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
-    else
-        echo "Will mount source for live development"
+    local containerfile="$repo_root/containers/Containerfile"
+    if [[ ! -f "$containerfile" ]]; then
+        echo "❌ Containerfile not found at $containerfile"
+        exit 1
    fi

-    if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
+    local build_cmd=(
+        docker
+        build
+        "$repo_root"
+        -f "$containerfile"
+        --tag "localhost/distribution-$DISTRO:dev"
+        --build-arg "DISTRO_NAME=$DISTRO"
+        --build-arg "INSTALL_MODE=editable"
+        --build-arg "LLAMA_STACK_DIR=/workspace"
+    )
+
+    if ! "${build_cmd[@]}"; then
        echo "❌ Failed to build Docker image"
        exit 1
    fi
@ -224,7 +243,7 @@ start_container() {
        # Check if image exists (with or without localhost/ prefix)
        if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
            echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
-            echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
+            echo "Either build it first without --no-rebuild, or run: docker build . -f containers/Containerfile --build-arg DISTRO_NAME=$DISTRO --tag localhost/distribution-$DISTRO:dev"
            exit 1
        fi
        echo "✅ Found existing image for distribution-$DISTRO:dev"
@ -236,8 +255,10 @@ start_container() {
    echo "=== Starting Docker Container ==="

    # Get the repo root for volume mount
-    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)
-    REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
+    local script_dir
+    script_dir=$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)
+    local repo_root
+    repo_root=$(cd "$script_dir/.." && pwd)

    # Determine the actual image name (may have localhost/ prefix)
    IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
@ -279,10 +300,18 @@ start_container() {
        NETWORK_MODE="--network host"
    fi

+    local source_mount=""
+    if should_copy_source; then
+        echo "Source baked into image (no volume mount)"
+    else
+        source_mount="-v \"$repo_root\":/workspace"
+        echo "Mounting $repo_root into /workspace"
+    fi
+
    docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
        -p $PORT:$PORT \
        $DOCKER_ENV_VARS \
-        -v "$REPO_ROOT":/app/llama-stack-source \
+        $source_mount \
        "$IMAGE_NAME" \
        --port $PORT

--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -238,6 +238,8 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
        echo "Stopping Docker container..."
        container_name="llama-stack-test-$DISTRO"
        if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
+            echo "Dumping container logs before stopping..."
+            docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
            echo "Stopping and removing container: $container_name"
            docker stop "$container_name" 2>/dev/null || true
            docker rm "$container_name" 2>/dev/null || true
@ -252,19 +254,24 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    export LLAMA_STACK_PORT=8321

    echo "=== Building Docker Image for distribution: $DISTRO ==="
-    # Set LLAMA_STACK_DIR to repo root
-    # USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
-    BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
-    if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
-        echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
-        BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
-    else
-        echo "Local mode: will mount source for live development"
+    containerfile="$ROOT_DIR/containers/Containerfile"
+    if [[ ! -f "$containerfile" ]]; then
+        echo "❌ Containerfile not found at $containerfile"
+        exit 1
    fi

-    eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
+    build_cmd=(
+        docker
+        build
+        "$ROOT_DIR"
+        -f "$containerfile"
+        --tag "localhost/distribution-$DISTRO:dev"
+        --build-arg "DISTRO_NAME=$DISTRO"
+        --build-arg "INSTALL_MODE=editable"
+        --build-arg "LLAMA_STACK_DIR=/workspace"
+    )

-    if [ $? -ne 0 ]; then
+    if ! "${build_cmd[@]}"; then
        echo "❌ Failed to build Docker image"
        exit 1
    fi
@ -304,7 +311,6 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    docker run -d --network host --name "$container_name" \
        -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
        $DOCKER_ENV_VARS \
-        -v $ROOT_DIR:/app/llama-stack-source \
        "$IMAGE_NAME" \
        --port $LLAMA_STACK_PORT

@ -404,6 +410,21 @@ elif [ $exit_code -eq 5 ]; then
    echo "⚠️ No tests collected (pattern matched no tests)"
 else
    echo "❌ Tests failed"
+    echo ""
+    echo "=== Dumping last 100 lines of logs for debugging ==="
+
+    # Output server or container logs based on stack config
+    if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then
+        echo "--- Last 100 lines of server.log ---"
+        tail -100 server.log
+    elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then
+        docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log"
+        if [[ -f "$docker_log_file" ]]; then
+            echo "--- Last 100 lines of $docker_log_file ---"
+            tail -100 "$docker_log_file"
+        fi
+    fi
+
    exit 1
 fi

--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.
 import inspect
 import itertools
+import logging  # allow-direct-logging
 import os
 import tempfile
 import textwrap
@ -37,6 +38,9 @@ def pytest_sessionstart(session):
    if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
        os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"

+    if "LLAMA_STACK_LOGGING" not in os.environ:
+        os.environ["LLAMA_STACK_LOGGING"] = "all=warning"
+
    if "SQLITE_STORE_DIR" not in os.environ:
        os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp()

@ -54,6 +58,12 @@ def pytest_sessionstart(session):
    patch_httpx_for_test_id()


+@pytest.fixture(autouse=True)
+def suppress_httpx_logs(caplog):
+    """Suppress httpx INFO logs for all integration tests"""
+    caplog.set_level(logging.WARNING, logger="httpx")
+
+
@pytest.fixture(autouse=True)
 def _track_test_context(request):
    """Automatically track current test context for isolated recordings.
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -40,7 +40,7 @@ def is_port_available(port: int, host: str = "localhost") -> bool:

 def start_llama_stack_server(config_name: str) -> subprocess.Popen:
    """Start a llama stack server with the given config."""
-    cmd = f"uv run --with llama-stack llama stack build --distro {config_name} --image-type venv --run"
+    cmd = f"uv run llama stack run {config_name}"
    devnull = open(os.devnull, "w")
    process = subprocess.Popen(
        shlex.split(cmd),
--- a/tests/integration/responses/recordings/00913934356fb4f196f8c21081154c67a7197dcc26e0326cf72448334c21424d.json
+++ b/tests/integration/responses/recordings/00913934356fb4f196f8c21081154c67a7197dcc26e0326cf72448334c21424d.json
@ -0,0 +1,660 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts_pdf]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "8anw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "99M"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "I"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ptMnH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Ue"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Euqoc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "zdLoy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Dq4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " number",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "nAZItljCEdlp1VF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " of",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "9VZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " experts",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "JqBWo7hfEH8khh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "hls"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "KHtw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-00913934356f",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 74,
+            "total_tokens": 98,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "94SeGMt8IytX52R"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/0c81f5d5c9e09daecd5eceaeae22bf228addd43f95f4a9ede8bd5a3cfbd091af.json
+++ b/tests/integration/responses/recordings/0c81f5d5c9e09daecd5eceaeae22bf228addd43f95f4a9ede8bd5a3cfbd091af.json
--- a/tests/integration/responses/recordings/1489591da67e505cdd2c1fddb0c3fb8372e880782d7783dd24b13689686a54e1.json
+++ b/tests/integration/responses/recordings/1489591da67e505cdd2c1fddb0c3fb8372e880782d7783dd24b13689686a54e1.json
@ -0,0 +1,668 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_4ac6gxccWFxDvEl8BizY3BJw",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "[1] document_id: file-528246887823, score: 0.01927231682811354, attributes: {'filename': 'test_sequential_file_search.txt', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-528246887823|>."
+        },
+        {
+          "role": "user",
+          "content": "Can you tell me more about the architecture?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_rmMfpryevUEhVly9yXhlsVG0",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "34Bp"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ZFY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "w"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "B"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "tJ1px"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "mJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "E6ImC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "h3NL0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "18C"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " architecture",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "l4ejZ7nxo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " details",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "1i292QtpWi65ci"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "EWU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "hNDG"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-1489591da67e",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 23,
+            "prompt_tokens": 377,
+            "total_tokens": 400,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "TajyNPPmLXMy1"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/15f4d677d61a2eecaedd1aafaa3925d76f5e98199f0c6baf6736bb140e008cff.json
+++ b/tests/integration/responses/recordings/15f4d677d61a2eecaedd1aafaa3925d76f5e98199f0c6baf6736bb140e008cff.json
@ -0,0 +1,763 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "[1] document_id: file-861837565219, score: 0.015252756439527764, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'document_id': 'file-861837565219', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-861837565219|>)\nLlama 4 Maverick has 128 experts\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "t286ibBdBtIrgr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "U9mPebT93jaD6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " L",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "JJ5Xp0JfYMykC6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "lama",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "oxaRGhfy0xhA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "DSR2mw4mJJpT9TQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "4",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "QR8qbwLCtyBsNPE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " Maver",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "lb79bMu1aH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "ick",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "siOOzlWO8l75q"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " model",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "7azbsqKmkb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " has",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "AGmv3xp6PWIL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "fZjTPPdOZO2fpsH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "128",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "7jbYm9zpbFY8u"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "GKSrKriL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": " <",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "U9Rqtd9YhQ2ARK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "2HSPrSu1pk90PIF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "file",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "OPPb0thERVPI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "dLA839tLWqtODM5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "861",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "sRR5OHXHSzinH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "837",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "r3aqOeXUsyjjZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "565",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "gsi9YE8xBivSU"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "219",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "zGSdQm7vspYCY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "m8Wm3YWMwlQ8zl5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": ">.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "lCLlH34nfjTsEB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "rTGBImqnxH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-15f4d677d61a",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 23,
+            "prompt_tokens": 326,
+            "total_tokens": 349,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "HWpBZq9m6jgEu"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/1ce6d4a8ec438d4a6a3618e5e6929b2db20e03a629092274039f33f909bd481d.json
+++ b/tests/integration/responses/recordings/1ce6d4a8ec438d4a6a3618e5e6929b2db20e03a629092274039f33f909bd481d.json
--- a/tests/integration/responses/recordings/302881895eae2384296096186c02053183a19c75d8a55f0d19ae51e36c7aa673.json
+++ b/tests/integration/responses/recordings/302881895eae2384296096186c02053183a19c75d8a55f0d19ae51e36c7aa673.json
--- a/tests/integration/responses/recordings/3183b7beec7e4e89f8051a2aec67b4a1f3b48fb71f8682967d35e19498eb9386.json
+++ b/tests/integration/responses/recordings/3183b7beec7e4e89f8051a2aec67b4a1f3b48fb71f8682967d35e19498eb9386.json
--- a/tests/integration/responses/recordings/354405fd4255ae9210d875c30b8c9e00a75d79046a954e4c5858e36c4fbd70c5.json
+++ b/tests/integration/responses/recordings/354405fd4255ae9210d875c30b8c9e00a75d79046a954e4c5858e36c4fbd70c5.json
@ -0,0 +1,925 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_4ac6gxccWFxDvEl8BizY3BJw",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "[1] document_id: file-528246887823, score: 0.01927231682811354, attributes: {'filename': 'test_sequential_file_search.txt', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "cNAmgzguPKxzrT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "EA42hQaqSv6Dl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " L",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "BaXUa7hVjk5nx5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "lama",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Qtlt9e2mKNnd"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "4uMDzeUHI2m6BqK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "4",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "wAiaIwGRcePCkSP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " Maver",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "9wrgxL4SLF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "ick",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "3LAZNfrT4P56F"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " model",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "FQnWVVG3uk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " has",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "QBV750ljlBuw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "CvHO7IpkYl1t6WT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "128",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "t9yXtBtcAN2Ym"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "nYeTyozH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "l5TEj9wVsDVVa"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " its",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "eqM279ge3tlP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " mixture",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "adEY1TL0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "AQUpfvABqhFSw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "CW4Mdlfb"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " architecture",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "0VM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": " <",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "lVhduxzervQNW2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "jLw8pSzPx1Fpvdu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "file",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "jNRy9leoYXdz"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "T0aCg9vGAgtyyqX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "528",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "pecAt4HBqHqgq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "246",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "HLmt7RtQ5V2Ad"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "887",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "nayq4hw8P8k8X"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "823",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "PONv6LbvqDB6n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "OOocdPvPE66JCLe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": ">.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "IoRrfOtEJevEyh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "3v8gEWZUDD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-354405fd4255",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 29,
+            "prompt_tokens": 332,
+            "total_tokens": 361,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "Vn7WFfgLy7CQc"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/3aa2233e21470cd1c2667b6e4e31cf680d89b3b3c936882767e4500b0cf6d517.json
+++ b/tests/integration/responses/recordings/3aa2233e21470cd1c2667b6e4e31cf680d89b3b3c936882767e4500b0cf6d517.json
@ -0,0 +1,660 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_FzhOmTdZThRndI5rSASPdAqr",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "3cjx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "7fk"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "S"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "G"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "OdWuY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "l2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "KaloW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "HzeTK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ceO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " number",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "WI17fZQOl0jugmg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " of",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "rQq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " experts",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "SOIKMbAKjFhXxq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "gbX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "bZLJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-3aa2233e2147",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 74,
+            "total_tokens": 98,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "R0oA3PanLpARhLY"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/3cc58466163409f7044471fb2a27413151487768ad8d178b4c1c766ebf757a48.json
+++ b/tests/integration/responses/recordings/3cc58466163409f7044471fb2a27413151487768ad8d178b4c1c766ebf757a48.json
--- a/tests/integration/responses/recordings/436039b9bd78d795fe43610ba51875025701bd363939f36299cf66846167b173.json
+++ b/tests/integration/responses/recordings/436039b9bd78d795fe43610ba51875025701bd363939f36299cf66846167b173.json
@ -0,0 +1,660 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ZEDA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "FWy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "A"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "eg5pY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "i3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "MIUQE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "PcKai"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "rZW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " number",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "5efBkaSneq3MYiM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " of",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Xzt"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " experts",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "VAZ5PINoNf5uLg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "5kI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "BvPC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-436039b9bd78",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 74,
+            "total_tokens": 98,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "967FbyvY0943LNF"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/43f424df929f565d2255c6d491d4bd19d8b1e30993882dad557fbdb655c65149.json
+++ b/tests/integration/responses/recordings/43f424df929f565d2255c6d491d4bd19d8b1e30993882dad557fbdb655c65149.json
@ -0,0 +1,586 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts_pdf]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_YkpKfL9mwbGk2BLqNDoaFrO0",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "1mfS"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "d4b"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "JRCsr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "jx"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "QMJkY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "scjuN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Ocj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " experts",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "mo3wLXjk9CANvH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "byy"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "8nAM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-43f424df929f",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 22,
+            "prompt_tokens": 74,
+            "total_tokens": 96,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "JFn10T2PhGSmfeW"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/456673fd514b80abfa8bb13c25acb9fa640daa3a1e1b5eef7a96863a592cc7fa.json
+++ b/tests/integration/responses/recordings/456673fd514b80abfa8bb13c25acb9fa640daa3a1e1b5eef7a96863a592cc7fa.json
--- a/tests/integration/responses/recordings/48cda1e432d9f9c3ce773147998fe5621478e542632f882fee067fd068c410dd.json
+++ b/tests/integration/responses/recordings/48cda1e432d9f9c3ce773147998fe5621478e542632f882fee067fd068c410dd.json
--- a/tests/integration/responses/recordings/5c9b18529d9400f8992d48eb3204e1ad41226bd16bdac187a98b591ea219062f.json
+++ b/tests/integration/responses/recordings/5c9b18529d9400f8992d48eb3204e1ad41226bd16bdac187a98b591ea219062f.json
@ -0,0 +1,668 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_FzhOmTdZThRndI5rSASPdAqr",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "[1] document_id: file-797509666839, score: 0.01927231682811354, attributes: {'filename': 'test_sequential_file_search.txt', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
+            }
+          ]
+        },
+        {
+          "role": "assistant",
+          "content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-797509666839|>."
+        },
+        {
+          "role": "user",
+          "content": "Can you tell me more about the architecture?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_0ABjXmEaManGBvCYVZD4QgMt",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "4WCi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "xZw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "v"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "pxc1l"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "rw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "1uQsI"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "3GUgc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "iSJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " architecture",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "PBkDU4h5O"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " details",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "FhjYMfvqXEQemh"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "bJM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": null,
+          "obfuscation": "SZTO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-5c9b18529d94",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_65564d8ba5",
+          "usage": {
+            "completion_tokens": 23,
+            "prompt_tokens": 377,
+            "total_tokens": 400,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "aghfgDFthkUtG"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/5d8c5aed931209aeeaa35ead5654389f7ed38381807ba34956844352a35db706.json
+++ b/tests/integration/responses/recordings/5d8c5aed931209aeeaa35ead5654389f7ed38381807ba34956844352a35db706.json
--- a/tests/integration/responses/recordings/7437ac7a1debcb3fb24b24eff26a26d16cd34bf338c9843ca8f9d5ce92101d72.json
+++ b/tests/integration/responses/recordings/7437ac7a1debcb3fb24b24eff26a26d16cd34bf338c9843ca8f9d5ce92101d72.json
@ -0,0 +1,925 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_FzhOmTdZThRndI5rSASPdAqr",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "[1] document_id: file-797509666839, score: 0.01927231682811354, attributes: {'filename': 'test_sequential_file_search.txt', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "vt3GstRZ0aEOtQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "V8LCLbwuWE6zL"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " L",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "AE5LGiL5P2vkSi"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "lama",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "URAGF9HPepld"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "YhQyLMSQQBttHSZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "4",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "MTXfd73x6CxT4jC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " Maver",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "FRyNMsTqpf"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "ick",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "pEvb1BhfCzIu3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " model",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ti8cCbCXCN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " has",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ePgYRhIsTnxM"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Er6jdeAhYhP5yZo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "128",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "kKbskZOW0nnLX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "9EkfQIUH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "s9t2OgQMO8lpH"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " its",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Y5meY2O4Ow06"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " mixture",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "d8vzlOjF"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "BZ0ZP2N8lYwx9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "GLsfkS8o"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " architecture",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "3le"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": " <",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "6yIln93VE6CXZc"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "12h0ytbne30ZcjD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "file",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "0EYqrf9KJNnT"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "9ZNIDg7zELT5q61"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "797",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "IGDExcadNKQ6z"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "509",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "36Ncp9snNGK23"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "666",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "oVuXPzQ3kvZIK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "839",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ZIqozh3gNXBtg"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "j1xdaBLDmWec5Fo"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": ">.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "057ZL7KuQlBsN5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Mizb7MVtEE"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-7437ac7a1deb",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 29,
+            "prompt_tokens": 332,
+            "total_tokens": 361,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "p2sRYaAtwLWB8"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/8d5e85fc1c0a827552218dceb170769d3809b1c271b0e1baaa182d8b01ad8571.json
+++ b/tests/integration/responses/recordings/8d5e85fc1c0a827552218dceb170769d3809b1c271b0e1baaa182d8b01ad8571.json
--- a/tests/integration/responses/recordings/a37a1c209697b900177f10ec5348b0ced3394322927971a2f4dfe52c2768ce57.json
+++ b/tests/integration/responses/recordings/a37a1c209697b900177f10ec5348b0ced3394322927971a2f4dfe52c2768ce57.json
@ -0,0 +1,660 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_4ac6gxccWFxDvEl8BizY3BJw",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "KVv4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "07x"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "WQVqQ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "b5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "5cG73"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "0TZwr"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "70V"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " number",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "tgfI6t9sl0qk9lj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " of",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "bb4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " experts",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "YwrLoyA0SH8QKR"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "r2N"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "UcRX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-a37a1c209697",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 24,
+            "prompt_tokens": 74,
+            "total_tokens": 98,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "Z9OHDBA6RWfdcqi"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/a9af62eaeb01f80a9b09b7c7ba468e6a34945cbf0b12aa2c74b2caedc31a821f.json
+++ b/tests/integration/responses/recordings/a9af62eaeb01f80a9b09b7c7ba468e6a34945cbf0b12aa2c74b2caedc31a821f.json
--- a/tests/integration/responses/recordings/b088ac3381c34089cf0eaba5d4b922271e8f8e8f169d94c5505de2a3ebe69c47.json
+++ b/tests/integration/responses/recordings/b088ac3381c34089cf0eaba5d4b922271e8f8e8f169d94c5505de2a3ebe69c47.json
@ -0,0 +1,763 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model experts count\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "[1] document_id: file-864460993305, score: 0.011418757438261288, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'document_id': 'file-864460993305', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-864460993305|>)\nLlama 4 Maverick has 128 experts\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model experts count\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "mdcAUKOPQatFDX"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "SXcyzIHM4JAb9"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " L",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "tIP37AbLRJtgw3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "lama",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "T6hl8O1yALrY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "EXmrHS7V452DM8U"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "4",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "WtvMno5JF3BbJja"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " Maver",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "wasA4Ibq0N"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "ick",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "zC5PHOZiqE8hV"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " model",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "TCLUWLK9fl"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " has",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "p1X5NlNo8vvP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "B4xarQa3WvzzZke"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "128",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "BEDzpnch9VTi5"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "NbeiTxmO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": " <",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "oEzJnEJlvJlvVB"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "COTg5EQAvBoF1X4"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "file",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "agnuCzlpfNMe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "-",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "6VItAaWcjjp8PCq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "864",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "IG901zUD4iD52"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "460",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "dTfo3F1G4iNgN"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "993",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "1hexBY3sKrN92"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "305",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "z2dp3INsqrUxD"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": "|",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Dt8q83oqrxwR8j6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": ">.",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ALKi5zh4iadh4W"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "j7ONgCjwww"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b088ac3381c3",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 23,
+            "prompt_tokens": 324,
+            "total_tokens": 347,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "frqCbYK1PAWId"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/b2d4b49b6d354fb75745d0b2ad461dac007bb6afa191350b03077eb71c316c61.json
+++ b/tests/integration/responses/recordings/b2d4b49b6d354fb75745d0b2ad461dac007bb6afa191350b03077eb71c316c61.json
@ -0,0 +1,623 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
+                    "function": {
+                      "arguments": "",
+                      "name": "knowledge_search"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "iVfJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "F4s"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "query",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "n"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\":\"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "m"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "L",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Q90JJ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "lama",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "e6"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " ",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "RVjmv"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "4",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "fxip1"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Maver",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "ick",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "APw"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " model",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " experts",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "SGxrR0wH4r9xmj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " count",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": ""
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "DPW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "pWLu"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b2d4b49b6d35",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 23,
+            "prompt_tokens": 74,
+            "total_tokens": 97,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "AQxCe0R2ppw6hGr"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/b3e7d7ee61c61b858b8a5f6af016e26bc3f948640354c1210f02c3b63bd26a55.json
+++ b/tests/integration/responses/recordings/b3e7d7ee61c61b858b8a5f6af016e26bc3f948640354c1210f02c3b63bd26a55.json
--- a/tests/integration/responses/recordings/b43291ac9074915996cd682cd023ce83993fb0b178ad02cb989e90ada09bdc1d.json
+++ b/tests/integration/responses/recordings/b43291ac9074915996cd682cd023ce83993fb0b178ad02cb989e90ada09bdc1d.json
@ -0,0 +1,705 @@
+{
+  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search_empty_vector_store[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "gpt-4o",
+      "messages": [
+        {
+          "role": "user",
+          "content": "How many experts does the Llama 4 Maverick model have?"
+        },
+        {
+          "role": "assistant",
+          "content": "",
+          "tool_calls": [
+            {
+              "index": 0,
+              "id": "call_ltsd3q9G7fq4by5VmgdvtNRX",
+              "type": "function",
+              "function": {
+                "name": "knowledge_search",
+                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
+              }
+            }
+          ]
+        },
+        {
+          "role": "tool",
+          "tool_call_id": "call_ltsd3q9G7fq4by5VmgdvtNRX",
+          "content": [
+            {
+              "type": "text",
+              "text": "knowledge_search tool found 0 chunks:\nBEGIN of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "END of knowledge_search tool results.\n"
+            },
+            {
+              "type": "text",
+              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query.\n"
+            }
+          ]
+        }
+      ],
+      "stream": true,
+      "stream_options": {
+        "include_usage": true
+      },
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "knowledge_search",
+            "description": "Search for information in a database.",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "query": {
+                  "type": "string",
+                  "description": "The query to search for. Can be a natural language sentence or keywords."
+                }
+              },
+              "required": [
+                "query"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "gpt-4o"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "0fgBVqnoZphRrO"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": "I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "kAndkfubV6NKXsY"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " couldn't",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "EXhAPDe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " find",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "nrXymFqvjdq"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " specific",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "lajtruZ"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " information",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "xasK"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " on",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "mrqUvzsWBTOO8"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ha2tJcKUNTiA"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " number",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "vpCsuweOe"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " of",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "GFFxLjDCZduzC"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " experts",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Xs8Vo94R"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "qLbADKniURbG3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "YF79Ocjj7FyP"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " L",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "pyuKZULeLEPvik"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": "lama",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "UKwrHwSz4E7a"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " ",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "GRGchsnIuihqbZ0"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": "4",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "Qpq01eD86BDpBoj"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " Maver",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "ZbaWcRhys3"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": "ick",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "srlQAcwr3TFz2"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": " model",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "OaURzdjGvn"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "I81L6v0mjvIsSpW"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "stop",
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": null,
+          "obfuscation": "eCBzPYMI2j"
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-b43291ac9074",
+          "choices": [],
+          "created": 0,
+          "model": "gpt-4o-2024-08-06",
+          "object": "chat.completion.chunk",
+          "service_tier": "default",
+          "system_fingerprint": "fp_f64f290af2",
+          "usage": {
+            "completion_tokens": 21,
+            "prompt_tokens": 163,
+            "total_tokens": 184,
+            "completion_tokens_details": {
+              "accepted_prediction_tokens": 0,
+              "audio_tokens": 0,
+              "reasoning_tokens": 0,
+              "rejected_prediction_tokens": 0
+            },
+            "prompt_tokens_details": {
+              "audio_tokens": 0,
+              "cached_tokens": 0
+            }
+          },
+          "obfuscation": "hPWaHWTF4MLy7"
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/responses/recordings/b46a06f7d0c1fde007c2b9e6e6d0b4c0694dccd4c61731db54c2d06bad1a0098.json
+++ b/tests/integration/responses/recordings/b46a06f7d0c1fde007c2b9e6e6d0b4c0694dccd4c61731db54c2d06bad1a0098.json
--- a/tests/integration/responses/recordings/ced781861a2e5ed75a37b9a2fc07a98f92a88d4c1ea550aa05011c5270184916.json
+++ b/tests/integration/responses/recordings/ced781861a2e5ed75a37b9a2fc07a98f92a88d4c1ea550aa05011c5270184916.json
--- a/Show more
+++ b/Show more