From ae74b31ae36ff13f92bf18e472860510f948c845 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 6 Oct 2025 13:27:30 -0400
Subject: [PATCH 01/14] chore: remove vLLM inference adapter's custom
 list_models (#3703)

# What does this PR do?

remove vLLM inference adapter's custom list_models impl, rely on
standard impl instead

## Test Plan

ci
---
 .../providers/remote/inference/vllm/vllm.py      | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 31241213a..4e7884cd2 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -19,7 +19,6 @@ from llama_stack.apis.inference import (
     OpenAIResponseFormatParam,
     ToolChoice,
 )
-from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
     HealthResponse,
@@ -58,21 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
         # Strictly respecting the refresh_models directive
         return self.config.refresh_models
 
-    async def list_models(self) -> list[Model] | None:
-        models = []
-        async for m in self.client.models.list():
-            model_type = ModelType.llm  # unclear how to determine embedding vs. llm models
-            models.append(
-                Model(
-                    identifier=m.id,
-                    provider_resource_id=m.id,
-                    provider_id=self.__provider_id__,  # type: ignore[attr-defined]
-                    metadata={},
-                    model_type=model_type,
-                )
-            )
-        return models
-
     async def health(self) -> HealthResponse:
         """
         Performs a health check by verifying connectivity to the remote vLLM server.

From de9940c697c499f33a3e74a64aac2a454e73f3bb Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 6 Oct 2025 13:27:40 -0400
Subject: [PATCH 02/14] chore: disable openai_embeddings on
 inference=remote::llama-openai-compat (#3704)

# What does this PR do?

api.llama.com does not provide embedding models, this makes that clear


## Test Plan

ci
---
 .../remote/inference/llama_openai_compat/llama.py    | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 403680668..165992c16 100644
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.inference.inference import OpenAICompletion
+from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
         suffix: str | None = None,
     ) -> OpenAICompletion:
         raise NotImplementedError()
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()

From 892ea759faa051873f20ac6f19426969193c7b79 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 6 Oct 2025 13:28:36 -0400
Subject: [PATCH 03/14] chore: remove together inference adapter's custom
 check_model_availability (#3702)

# What does this PR do?

remove Together inference adapter's check_model_availability impl, rely
on standard impl instead


## Test Plan

ci
---
 llama_stack/providers/remote/inference/together/together.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 20669bef9..fbefe630f 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -66,9 +66,6 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
     async def should_refresh_models(self) -> bool:
         return True
 
-    async def check_model_availability(self, model):
-        return model in self._model_cache
-
     async def openai_embeddings(
         self,
         model: str,

From a8da6ba3a76f9e0fe81204f6a015aa62f7015e19 Mon Sep 17 00:00:00 2001
From: Alexey Rybak <50731695+reluctantfuturist@users.noreply.github.com>
Date: Mon, 6 Oct 2025 10:46:33 -0700
Subject: [PATCH 04/14] docs: API docstrings cleanup for better documentation
 rendering (#3661)

# What does this PR do?
* Cleans up API docstrings for better documentation rendering

<img width="2346" height="1126" alt="image"
src="https://github.com/user-attachments/assets/516b09a1-2d5b-4614-a3a9-13431fc21fc1"
/>

## Test Plan
* Manual testing

---------

Signed-off-by: Doug Edgar <dedgar@redhat.com>
Signed-off-by: Charlie Doern <cdoern@redhat.com>
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: ehhuang <ehhuang@users.noreply.github.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Co-authored-by: Matthew Farrellee <matt@cs.wisc.edu>
Co-authored-by: Doug Edgar <dedgar@redhat.com>
Co-authored-by: Christian Zaccaria <73656840+ChristianZaccaria@users.noreply.github.com>
Co-authored-by: Anastas Stoyanovsky <contact@anastas.eu>
Co-authored-by: Charlie Doern <cdoern@redhat.com>
Co-authored-by: Francisco Arceo <arceofrancisco@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Young Han <110819238+seyeong-han@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/docs/providers/files/index.mdx          |   7 +
 docs/docs/providers/inference/index.mdx      |   8 +-
 docs/docs/providers/safety/index.mdx         |   7 +
 docs/static/deprecated-llama-stack-spec.html |  74 +++----
 docs/static/deprecated-llama-stack-spec.yaml |  97 +++++----
 docs/static/llama-stack-spec.html            | 145 ++++++-------
 docs/static/llama-stack-spec.yaml            | 203 ++++++++++++-------
 docs/static/stainless-llama-stack-spec.html  | 145 ++++++-------
 docs/static/stainless-llama-stack-spec.yaml  | 203 ++++++++++++-------
 llama_stack/apis/agents/agents.py            |  10 +-
 llama_stack/apis/files/files.py              |  20 +-
 llama_stack/apis/inference/inference.py      |  22 +-
 llama_stack/apis/inspect/inspect.py          |  17 +-
 llama_stack/apis/models/models.py            |  12 +-
 llama_stack/apis/prompts/prompts.py          |  28 ++-
 llama_stack/apis/providers/providers.py      |  11 +-
 llama_stack/apis/safety/safety.py            |  13 +-
 17 files changed, 611 insertions(+), 411 deletions(-)

diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx
index 7d729d90f..19e338035 100644
--- a/docs/docs/providers/files/index.mdx
+++ b/docs/docs/providers/files/index.mdx
@@ -1,4 +1,7 @@
 ---
+description: "Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs."
 sidebar_label: Files
 title: Files
 ---
@@ -7,4 +10,8 @@ title: Files
 
 ## Overview
 
+Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs.
+
 This section contains documentation for all available providers for the **files** API.
diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index ebbaf1be1..c2bf69962 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -1,5 +1,7 @@
 ---
-description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
+description: "Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
@@ -12,7 +14,9 @@ title: Inference
 
 ## Overview
 
-Llama Stack Inference API for generating completions, chat completions, and embeddings.
+Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx
index 3445b17e6..4e2de4f33 100644
--- a/docs/docs/providers/safety/index.mdx
+++ b/docs/docs/providers/safety/index.mdx
@@ -1,4 +1,7 @@
 ---
+description: "Safety
+
+    OpenAI-compatible Moderations API."
 sidebar_label: Safety
 title: Safety
 ---
@@ -7,4 +10,8 @@ title: Safety
 
 ## Overview
 
+Safety
+
+    OpenAI-compatible Moderations API.
+
 This section contains documentation for all available providers for the **safety** API.
diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index ffda7552b..04a3dca9b 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -1443,8 +1443,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
+                "summary": "List chat completions.",
+                "description": "List chat completions.",
                 "parameters": [
                     {
                         "name": "after",
@@ -1520,8 +1520,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "summary": "Create chat completions.",
+                "description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1565,8 +1565,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
+                "summary": "Get chat completion.",
+                "description": "Get chat completion.\nDescribe a chat completion by its ID.",
                 "parameters": [
                     {
                         "name": "completion_id",
@@ -1610,8 +1610,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "summary": "Create completion.",
+                "description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1655,8 +1655,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "summary": "Create embeddings.",
+                "description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1700,8 +1700,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
+                "summary": "List files.",
+                "description": "List files.\nReturns a list of files that belong to the user's organization.",
                 "parameters": [
                     {
                         "name": "after",
@@ -1770,8 +1770,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
+                "summary": "Upload file.",
+                "description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1831,8 +1831,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
+                "summary": "Retrieve file.",
+                "description": "Retrieve file.\nReturns information about a specific file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1874,8 +1874,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
+                "summary": "Delete file.",
+                "description": "Delete file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1919,8 +1919,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
+                "summary": "Retrieve file content.",
+                "description": "Retrieve file content.\nReturns the contents of the specified file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1999,8 +1999,8 @@
                 "tags": [
                     "Safety"
                 ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "summary": "Create moderation.",
+                "description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2044,8 +2044,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
+                "summary": "List all responses.",
+                "description": "List all responses.",
                 "parameters": [
                     {
                         "name": "after",
@@ -2119,8 +2119,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "Create a model response.",
+                "description": "Create a model response.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -2184,8 +2184,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
+                "summary": "Get a model response.",
+                "description": "Get a model response.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -2227,8 +2227,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
+                "summary": "Delete a response.",
+                "description": "Delete a response.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -2272,8 +2272,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
+                "summary": "List input items.",
+                "description": "List input items.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -13366,12 +13366,13 @@
         },
         {
             "name": "Files",
-            "description": ""
+            "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+            "x-displayName": "Files"
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
-            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Inference"
         },
         {
             "name": "Models",
@@ -13383,7 +13384,8 @@
         },
         {
             "name": "Safety",
-            "description": ""
+            "description": "OpenAI-compatible Moderations API.",
+            "x-displayName": "Safety"
         },
         {
             "name": "Telemetry",
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 0e672f914..1a215b877 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -1033,8 +1033,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
+      summary: List chat completions.
+      description: List chat completions.
       parameters:
         - name: after
           in: query
@@ -1087,10 +1087,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
+      summary: Create chat completions.
       description: >-
+        Create chat completions.
+
         Generate an OpenAI-compatible chat completion for the given messages using
         the specified model.
       parameters: []
@@ -1122,8 +1122,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
+      summary: Get chat completion.
+      description: >-
+        Get chat completion.
+
+        Describe a chat completion by its ID.
       parameters:
         - name: completion_id
           in: path
@@ -1153,10 +1156,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
+      summary: Create completion.
       description: >-
+        Create completion.
+
         Generate an OpenAI-compatible completion for the given prompt using the specified
         model.
       parameters: []
@@ -1189,10 +1192,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
+      summary: Create embeddings.
       description: >-
+        Create embeddings.
+
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
       parameters: []
@@ -1225,9 +1228,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
+      summary: List files.
       description: >-
+        List files.
+
         Returns a list of files that belong to the user's organization.
       parameters:
         - name: after
@@ -1285,11 +1289,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
+      summary: Upload file.
       description: >-
+        Upload file.
+
         Upload a file that can be used across various endpoints.
 
+
         The file upload should be a multipart form request with:
 
         - file: The File object (not file name) to be uploaded.
@@ -1338,9 +1344,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns information about a specific file.
+      summary: Retrieve file.
       description: >-
+        Retrieve file.
+
         Returns information about a specific file.
       parameters:
         - name: file_id
@@ -1372,8 +1379,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: Delete a file.
-      description: Delete a file.
+      summary: Delete file.
+      description: Delete file.
       parameters:
         - name: file_id
           in: path
@@ -1405,9 +1412,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns the contents of the specified file.
+      summary: Retrieve file content.
       description: >-
+        Retrieve file content.
+
         Returns the contents of the specified file.
       parameters:
         - name: file_id
@@ -1464,9 +1472,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
+      summary: Create moderation.
       description: >-
+        Create moderation.
+
         Classifies if text and/or image inputs are potentially harmful.
       parameters: []
       requestBody:
@@ -1497,8 +1506,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
+      summary: List all responses.
+      description: List all responses.
       parameters:
         - name: after
           in: query
@@ -1549,8 +1558,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: Create a model response.
+      description: Create a model response.
       parameters: []
       requestBody:
         content:
@@ -1592,8 +1601,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
+      summary: Get a model response.
+      description: Get a model response.
       parameters:
         - name: response_id
           in: path
@@ -1623,8 +1632,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
+      summary: Delete a response.
+      description: Delete a response.
       parameters:
         - name: response_id
           in: path
@@ -1654,10 +1663,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
+      summary: List input items.
+      description: List input items.
       parameters:
         - name: response_id
           in: path
@@ -10011,9 +10018,16 @@ tags:
     x-displayName: >-
       Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: Files
-    description: ''
+    description: >-
+      This API is used to upload documents that can be used with other Llama Stack
+      APIs.
+    x-displayName: Files
   - name: Inference
     description: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
+
+
       This API provides the raw interface to the underlying models. Two kinds of models
       are supported:
 
@@ -10021,15 +10035,14 @@ tags:
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
-    x-displayName: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
+    x-displayName: Inference
   - name: Models
     description: ''
   - name: PostTraining (Coming Soon)
     description: ''
   - name: Safety
-    description: ''
+    description: OpenAI-compatible Moderations API.
+    x-displayName: Safety
   - name: Telemetry
     description: ''
   - name: VectorIO
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index c570dcddf..9cd526176 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -69,8 +69,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
+                "summary": "List chat completions.",
+                "description": "List chat completions.",
                 "parameters": [
                     {
                         "name": "after",
@@ -146,8 +146,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "summary": "Create chat completions.",
+                "description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -191,8 +191,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
+                "summary": "Get chat completion.",
+                "description": "Get chat completion.\nDescribe a chat completion by its ID.",
                 "parameters": [
                     {
                         "name": "completion_id",
@@ -236,8 +236,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "summary": "Create completion.",
+                "description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -758,8 +758,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "summary": "Create embeddings.",
+                "description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -803,8 +803,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
+                "summary": "List files.",
+                "description": "List files.\nReturns a list of files that belong to the user's organization.",
                 "parameters": [
                     {
                         "name": "after",
@@ -873,8 +873,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
+                "summary": "Upload file.",
+                "description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -934,8 +934,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
+                "summary": "Retrieve file.",
+                "description": "Retrieve file.\nReturns information about a specific file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -977,8 +977,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
+                "summary": "Delete file.",
+                "description": "Delete file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1022,8 +1022,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
+                "summary": "Retrieve file content.",
+                "description": "Retrieve file content.\nReturns the contents of the specified file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1067,8 +1067,8 @@
                 "tags": [
                     "Inspect"
                 ],
-                "summary": "Get the current health status of the service.",
-                "description": "Get the current health status of the service.",
+                "summary": "Get health status.",
+                "description": "Get health status.\nGet the current health status of the service.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -1102,8 +1102,8 @@
                 "tags": [
                     "Inspect"
                 ],
-                "summary": "List all available API routes with their methods and implementing providers.",
-                "description": "List all available API routes with their methods and implementing providers.",
+                "summary": "List routes.",
+                "description": "List routes.\nList all available API routes with their methods and implementing providers.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -1170,8 +1170,8 @@
                 "tags": [
                     "Models"
                 ],
-                "summary": "Register a model.",
-                "description": "Register a model.",
+                "summary": "Register model.",
+                "description": "Register model.\nRegister a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1215,8 +1215,8 @@
                 "tags": [
                     "Models"
                 ],
-                "summary": "Get a model by its identifier.",
-                "description": "Get a model by its identifier.",
+                "summary": "Get model.",
+                "description": "Get model.\nGet a model by its identifier.",
                 "parameters": [
                     {
                         "name": "model_id",
@@ -1251,8 +1251,8 @@
                 "tags": [
                     "Models"
                 ],
-                "summary": "Unregister a model.",
-                "description": "Unregister a model.",
+                "summary": "Unregister model.",
+                "description": "Unregister model.\nUnregister a model.",
                 "parameters": [
                     {
                         "name": "model_id",
@@ -1296,8 +1296,8 @@
                 "tags": [
                     "Safety"
                 ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "summary": "Create moderation.",
+                "description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1374,8 +1374,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Create a new prompt.",
-                "description": "Create a new prompt.",
+                "summary": "Create prompt.",
+                "description": "Create prompt.\nCreate a new prompt.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1419,8 +1419,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Get a prompt by its identifier and optional version.",
-                "description": "Get a prompt by its identifier and optional version.",
+                "summary": "Get prompt.",
+                "description": "Get prompt.\nGet a prompt by its identifier and optional version.",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1471,8 +1471,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Update an existing prompt (increments version).",
-                "description": "Update an existing prompt (increments version).",
+                "summary": "Update prompt.",
+                "description": "Update prompt.\nUpdate an existing prompt (increments version).",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1517,8 +1517,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Delete a prompt.",
-                "description": "Delete a prompt.",
+                "summary": "Delete prompt.",
+                "description": "Delete prompt.\nDelete a prompt.",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1562,8 +1562,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Set which version of a prompt should be the default in get_prompt (latest).",
-                "description": "Set which version of a prompt should be the default in get_prompt (latest).",
+                "summary": "Set prompt version.",
+                "description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1617,8 +1617,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "List all versions of a specific prompt.",
-                "description": "List all versions of a specific prompt.",
+                "summary": "List prompt versions.",
+                "description": "List prompt versions.\nList all versions of a specific prompt.",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1662,8 +1662,8 @@
                 "tags": [
                     "Providers"
                 ],
-                "summary": "List all available providers.",
-                "description": "List all available providers.",
+                "summary": "List providers.",
+                "description": "List providers.\nList all available providers.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -1697,8 +1697,8 @@
                 "tags": [
                     "Providers"
                 ],
-                "summary": "Get detailed information about a specific provider.",
-                "description": "Get detailed information about a specific provider.",
+                "summary": "Get provider.",
+                "description": "Get provider.\nGet detailed information about a specific provider.",
                 "parameters": [
                     {
                         "name": "provider_id",
@@ -1742,8 +1742,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
+                "summary": "List all responses.",
+                "description": "List all responses.",
                 "parameters": [
                     {
                         "name": "after",
@@ -1817,8 +1817,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "Create a model response.",
+                "description": "Create a model response.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1882,8 +1882,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
+                "summary": "Get a model response.",
+                "description": "Get a model response.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -1925,8 +1925,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
+                "summary": "Delete a response.",
+                "description": "Delete a response.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -1970,8 +1970,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
+                "summary": "List input items.",
+                "description": "List input items.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -2063,8 +2063,8 @@
                 "tags": [
                     "Safety"
                 ],
-                "summary": "Run a shield.",
-                "description": "Run a shield.",
+                "summary": "Run shield.",
+                "description": "Run shield.\nRun a shield.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -4196,8 +4196,8 @@
                 "tags": [
                     "Inspect"
                 ],
-                "summary": "Get the version of the service.",
-                "description": "Get the version of the service.",
+                "summary": "Get version.",
+                "description": "Get version.\nGet the version of the service.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -12914,16 +12914,18 @@
         },
         {
             "name": "Files",
-            "description": ""
+            "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+            "x-displayName": "Files"
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
-            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Inference"
         },
         {
             "name": "Inspect",
-            "description": ""
+            "description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
+            "x-displayName": "Inspect"
         },
         {
             "name": "Models",
@@ -12931,17 +12933,18 @@
         },
         {
             "name": "Prompts",
-            "description": "",
-            "x-displayName": "Protocol for prompt management operations."
+            "description": "Protocol for prompt management operations.",
+            "x-displayName": "Prompts"
         },
         {
             "name": "Providers",
-            "description": "",
-            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
+            "description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
+            "x-displayName": "Providers"
         },
         {
             "name": "Safety",
-            "description": ""
+            "description": "OpenAI-compatible Moderations API.",
+            "x-displayName": "Safety"
         },
         {
             "name": "Scoring",
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 3e1431b22..66ce8e38a 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -33,8 +33,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
+      summary: List chat completions.
+      description: List chat completions.
       parameters:
         - name: after
           in: query
@@ -87,10 +87,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
+      summary: Create chat completions.
       description: >-
+        Create chat completions.
+
         Generate an OpenAI-compatible chat completion for the given messages using
         the specified model.
       parameters: []
@@ -122,8 +122,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
+      summary: Get chat completion.
+      description: >-
+        Get chat completion.
+
+        Describe a chat completion by its ID.
       parameters:
         - name: completion_id
           in: path
@@ -153,10 +156,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
+      summary: Create completion.
       description: >-
+        Create completion.
+
         Generate an OpenAI-compatible completion for the given prompt using the specified
         model.
       parameters: []
@@ -603,10 +606,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
+      summary: Create embeddings.
       description: >-
+        Create embeddings.
+
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
       parameters: []
@@ -639,9 +642,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
+      summary: List files.
       description: >-
+        List files.
+
         Returns a list of files that belong to the user's organization.
       parameters:
         - name: after
@@ -699,11 +703,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
+      summary: Upload file.
       description: >-
+        Upload file.
+
         Upload a file that can be used across various endpoints.
 
+
         The file upload should be a multipart form request with:
 
         - file: The File object (not file name) to be uploaded.
@@ -752,9 +758,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns information about a specific file.
+      summary: Retrieve file.
       description: >-
+        Retrieve file.
+
         Returns information about a specific file.
       parameters:
         - name: file_id
@@ -786,8 +793,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: Delete a file.
-      description: Delete a file.
+      summary: Delete file.
+      description: Delete file.
       parameters:
         - name: file_id
           in: path
@@ -819,9 +826,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns the contents of the specified file.
+      summary: Retrieve file content.
       description: >-
+        Retrieve file content.
+
         Returns the contents of the specified file.
       parameters:
         - name: file_id
@@ -854,9 +862,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      summary: >-
-        Get the current health status of the service.
+      summary: Get health status.
       description: >-
+        Get health status.
+
         Get the current health status of the service.
       parameters: []
       deprecated: false
@@ -882,9 +891,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      summary: >-
-        List all available API routes with their methods and implementing providers.
+      summary: List routes.
       description: >-
+        List routes.
+
         List all available API routes with their methods and implementing providers.
       parameters: []
       deprecated: false
@@ -933,8 +943,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      summary: Register a model.
-      description: Register a model.
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
       parameters: []
       requestBody:
         content:
@@ -964,8 +977,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      summary: Get a model by its identifier.
-      description: Get a model by its identifier.
+      summary: Get model.
+      description: >-
+        Get model.
+
+        Get a model by its identifier.
       parameters:
         - name: model_id
           in: path
@@ -990,8 +1006,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      summary: Unregister a model.
-      description: Unregister a model.
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
       parameters:
         - name: model_id
           in: path
@@ -1022,9 +1041,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
+      summary: Create moderation.
       description: >-
+        Create moderation.
+
         Classifies if text and/or image inputs are potentially harmful.
       parameters: []
       requestBody:
@@ -1080,8 +1100,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: Create a new prompt.
-      description: Create a new prompt.
+      summary: Create prompt.
+      description: >-
+        Create prompt.
+
+        Create a new prompt.
       parameters: []
       requestBody:
         content:
@@ -1111,9 +1134,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: >-
-        Get a prompt by its identifier and optional version.
+      summary: Get prompt.
       description: >-
+        Get prompt.
+
         Get a prompt by its identifier and optional version.
       parameters:
         - name: prompt_id
@@ -1151,9 +1175,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: >-
-        Update an existing prompt (increments version).
+      summary: Update prompt.
       description: >-
+        Update prompt.
+
         Update an existing prompt (increments version).
       parameters:
         - name: prompt_id
@@ -1185,8 +1210,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: Delete a prompt.
-      description: Delete a prompt.
+      summary: Delete prompt.
+      description: >-
+        Delete prompt.
+
+        Delete a prompt.
       parameters:
         - name: prompt_id
           in: path
@@ -1217,9 +1245,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: >-
-        Set which version of a prompt should be the default in get_prompt (latest).
+      summary: Set prompt version.
       description: >-
+        Set prompt version.
+
         Set which version of a prompt should be the default in get_prompt (latest).
       parameters:
         - name: prompt_id
@@ -1257,8 +1286,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: List all versions of a specific prompt.
-      description: List all versions of a specific prompt.
+      summary: List prompt versions.
+      description: >-
+        List prompt versions.
+
+        List all versions of a specific prompt.
       parameters:
         - name: prompt_id
           in: path
@@ -1290,8 +1322,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      summary: List all available providers.
-      description: List all available providers.
+      summary: List providers.
+      description: >-
+        List providers.
+
+        List all available providers.
       parameters: []
       deprecated: false
   /v1/providers/{provider_id}:
@@ -1316,9 +1351,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      summary: >-
-        Get detailed information about a specific provider.
+      summary: Get provider.
       description: >-
+        Get provider.
+
         Get detailed information about a specific provider.
       parameters:
         - name: provider_id
@@ -1349,8 +1385,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
+      summary: List all responses.
+      description: List all responses.
       parameters:
         - name: after
           in: query
@@ -1401,8 +1437,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: Create a model response.
+      description: Create a model response.
       parameters: []
       requestBody:
         content:
@@ -1444,8 +1480,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
+      summary: Get a model response.
+      description: Get a model response.
       parameters:
         - name: response_id
           in: path
@@ -1475,8 +1511,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
+      summary: Delete a response.
+      description: Delete a response.
       parameters:
         - name: response_id
           in: path
@@ -1506,10 +1542,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
+      summary: List input items.
+      description: List input items.
       parameters:
         - name: response_id
           in: path
@@ -1578,8 +1612,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      summary: Run a shield.
-      description: Run a shield.
+      summary: Run shield.
+      description: >-
+        Run shield.
+
+        Run a shield.
       parameters: []
       requestBody:
         content:
@@ -3135,8 +3172,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      summary: Get the version of the service.
-      description: Get the version of the service.
+      summary: Get version.
+      description: >-
+        Get version.
+
+        Get the version of the service.
       parameters: []
       deprecated: false
 jsonSchemaDialect: >-
@@ -9749,9 +9789,16 @@ tags:
     x-displayName: >-
       Protocol for conversation management operations.
   - name: Files
-    description: ''
+    description: >-
+      This API is used to upload documents that can be used with other Llama Stack
+      APIs.
+    x-displayName: Files
   - name: Inference
     description: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
+
+
       This API provides the raw interface to the underlying models. Two kinds of models
       are supported:
 
@@ -9759,23 +9806,25 @@ tags:
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
-    x-displayName: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
+    x-displayName: Inference
   - name: Inspect
-    description: ''
+    description: >-
+      APIs for inspecting the Llama Stack service, including health status, available
+      API routes with methods and implementing providers.
+    x-displayName: Inspect
   - name: Models
     description: ''
   - name: Prompts
-    description: ''
-    x-displayName: >-
+    description: >-
       Protocol for prompt management operations.
+    x-displayName: Prompts
   - name: Providers
-    description: ''
-    x-displayName: >-
+    description: >-
       Providers API for inspecting, listing, and modifying providers and their configurations.
+    x-displayName: Providers
   - name: Safety
-    description: ''
+    description: OpenAI-compatible Moderations API.
+    x-displayName: Safety
   - name: Scoring
     description: ''
   - name: ScoringFunctions
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index 167a4aa3c..3478d3338 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -69,8 +69,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
+                "summary": "List chat completions.",
+                "description": "List chat completions.",
                 "parameters": [
                     {
                         "name": "after",
@@ -146,8 +146,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "summary": "Create chat completions.",
+                "description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -191,8 +191,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
+                "summary": "Get chat completion.",
+                "description": "Get chat completion.\nDescribe a chat completion by its ID.",
                 "parameters": [
                     {
                         "name": "completion_id",
@@ -236,8 +236,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "summary": "Create completion.",
+                "description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -758,8 +758,8 @@
                 "tags": [
                     "Inference"
                 ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "summary": "Create embeddings.",
+                "description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -803,8 +803,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
+                "summary": "List files.",
+                "description": "List files.\nReturns a list of files that belong to the user's organization.",
                 "parameters": [
                     {
                         "name": "after",
@@ -873,8 +873,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
+                "summary": "Upload file.",
+                "description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -934,8 +934,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
+                "summary": "Retrieve file.",
+                "description": "Retrieve file.\nReturns information about a specific file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -977,8 +977,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
+                "summary": "Delete file.",
+                "description": "Delete file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1022,8 +1022,8 @@
                 "tags": [
                     "Files"
                 ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
+                "summary": "Retrieve file content.",
+                "description": "Retrieve file content.\nReturns the contents of the specified file.",
                 "parameters": [
                     {
                         "name": "file_id",
@@ -1067,8 +1067,8 @@
                 "tags": [
                     "Inspect"
                 ],
-                "summary": "Get the current health status of the service.",
-                "description": "Get the current health status of the service.",
+                "summary": "Get health status.",
+                "description": "Get health status.\nGet the current health status of the service.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -1102,8 +1102,8 @@
                 "tags": [
                     "Inspect"
                 ],
-                "summary": "List all available API routes with their methods and implementing providers.",
-                "description": "List all available API routes with their methods and implementing providers.",
+                "summary": "List routes.",
+                "description": "List routes.\nList all available API routes with their methods and implementing providers.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -1170,8 +1170,8 @@
                 "tags": [
                     "Models"
                 ],
-                "summary": "Register a model.",
-                "description": "Register a model.",
+                "summary": "Register model.",
+                "description": "Register model.\nRegister a model.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1215,8 +1215,8 @@
                 "tags": [
                     "Models"
                 ],
-                "summary": "Get a model by its identifier.",
-                "description": "Get a model by its identifier.",
+                "summary": "Get model.",
+                "description": "Get model.\nGet a model by its identifier.",
                 "parameters": [
                     {
                         "name": "model_id",
@@ -1251,8 +1251,8 @@
                 "tags": [
                     "Models"
                 ],
-                "summary": "Unregister a model.",
-                "description": "Unregister a model.",
+                "summary": "Unregister model.",
+                "description": "Unregister model.\nUnregister a model.",
                 "parameters": [
                     {
                         "name": "model_id",
@@ -1296,8 +1296,8 @@
                 "tags": [
                     "Safety"
                 ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "summary": "Create moderation.",
+                "description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1374,8 +1374,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Create a new prompt.",
-                "description": "Create a new prompt.",
+                "summary": "Create prompt.",
+                "description": "Create prompt.\nCreate a new prompt.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1419,8 +1419,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Get a prompt by its identifier and optional version.",
-                "description": "Get a prompt by its identifier and optional version.",
+                "summary": "Get prompt.",
+                "description": "Get prompt.\nGet a prompt by its identifier and optional version.",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1471,8 +1471,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Update an existing prompt (increments version).",
-                "description": "Update an existing prompt (increments version).",
+                "summary": "Update prompt.",
+                "description": "Update prompt.\nUpdate an existing prompt (increments version).",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1517,8 +1517,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Delete a prompt.",
-                "description": "Delete a prompt.",
+                "summary": "Delete prompt.",
+                "description": "Delete prompt.\nDelete a prompt.",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1562,8 +1562,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "Set which version of a prompt should be the default in get_prompt (latest).",
-                "description": "Set which version of a prompt should be the default in get_prompt (latest).",
+                "summary": "Set prompt version.",
+                "description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1617,8 +1617,8 @@
                 "tags": [
                     "Prompts"
                 ],
-                "summary": "List all versions of a specific prompt.",
-                "description": "List all versions of a specific prompt.",
+                "summary": "List prompt versions.",
+                "description": "List prompt versions.\nList all versions of a specific prompt.",
                 "parameters": [
                     {
                         "name": "prompt_id",
@@ -1662,8 +1662,8 @@
                 "tags": [
                     "Providers"
                 ],
-                "summary": "List all available providers.",
-                "description": "List all available providers.",
+                "summary": "List providers.",
+                "description": "List providers.\nList all available providers.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -1697,8 +1697,8 @@
                 "tags": [
                     "Providers"
                 ],
-                "summary": "Get detailed information about a specific provider.",
-                "description": "Get detailed information about a specific provider.",
+                "summary": "Get provider.",
+                "description": "Get provider.\nGet detailed information about a specific provider.",
                 "parameters": [
                     {
                         "name": "provider_id",
@@ -1742,8 +1742,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
+                "summary": "List all responses.",
+                "description": "List all responses.",
                 "parameters": [
                     {
                         "name": "after",
@@ -1817,8 +1817,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "Create a model response.",
+                "description": "Create a model response.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -1882,8 +1882,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
+                "summary": "Get a model response.",
+                "description": "Get a model response.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -1925,8 +1925,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
+                "summary": "Delete a response.",
+                "description": "Delete a response.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -1970,8 +1970,8 @@
                 "tags": [
                     "Agents"
                 ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
+                "summary": "List input items.",
+                "description": "List input items.",
                 "parameters": [
                     {
                         "name": "response_id",
@@ -2063,8 +2063,8 @@
                 "tags": [
                     "Safety"
                 ],
-                "summary": "Run a shield.",
-                "description": "Run a shield.",
+                "summary": "Run shield.",
+                "description": "Run shield.\nRun a shield.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -4196,8 +4196,8 @@
                 "tags": [
                     "Inspect"
                 ],
-                "summary": "Get the version of the service.",
-                "description": "Get the version of the service.",
+                "summary": "Get version.",
+                "description": "Get version.\nGet the version of the service.",
                 "parameters": [],
                 "deprecated": false
             }
@@ -18487,16 +18487,18 @@
         },
         {
             "name": "Files",
-            "description": ""
+            "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+            "x-displayName": "Files"
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
-            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Inference"
         },
         {
             "name": "Inspect",
-            "description": ""
+            "description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
+            "x-displayName": "Inspect"
         },
         {
             "name": "Models",
@@ -18508,17 +18510,18 @@
         },
         {
             "name": "Prompts",
-            "description": "",
-            "x-displayName": "Protocol for prompt management operations."
+            "description": "Protocol for prompt management operations.",
+            "x-displayName": "Prompts"
         },
         {
             "name": "Providers",
-            "description": "",
-            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
+            "description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
+            "x-displayName": "Providers"
         },
         {
             "name": "Safety",
-            "description": ""
+            "description": "OpenAI-compatible Moderations API.",
+            "x-displayName": "Safety"
         },
         {
             "name": "Scoring",
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 6dc1041f1..6c04542bf 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -36,8 +36,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
+      summary: List chat completions.
+      description: List chat completions.
       parameters:
         - name: after
           in: query
@@ -90,10 +90,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
+      summary: Create chat completions.
       description: >-
+        Create chat completions.
+
         Generate an OpenAI-compatible chat completion for the given messages using
         the specified model.
       parameters: []
@@ -125,8 +125,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
+      summary: Get chat completion.
+      description: >-
+        Get chat completion.
+
+        Describe a chat completion by its ID.
       parameters:
         - name: completion_id
           in: path
@@ -156,10 +159,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
+      summary: Create completion.
       description: >-
+        Create completion.
+
         Generate an OpenAI-compatible completion for the given prompt using the specified
         model.
       parameters: []
@@ -606,10 +609,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
+      summary: Create embeddings.
       description: >-
+        Create embeddings.
+
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
       parameters: []
@@ -642,9 +645,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
+      summary: List files.
       description: >-
+        List files.
+
         Returns a list of files that belong to the user's organization.
       parameters:
         - name: after
@@ -702,11 +706,13 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
+      summary: Upload file.
       description: >-
+        Upload file.
+
         Upload a file that can be used across various endpoints.
 
+
         The file upload should be a multipart form request with:
 
         - file: The File object (not file name) to be uploaded.
@@ -755,9 +761,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns information about a specific file.
+      summary: Retrieve file.
       description: >-
+        Retrieve file.
+
         Returns information about a specific file.
       parameters:
         - name: file_id
@@ -789,8 +796,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: Delete a file.
-      description: Delete a file.
+      summary: Delete file.
+      description: Delete file.
       parameters:
         - name: file_id
           in: path
@@ -822,9 +829,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Files
-      summary: >-
-        Returns the contents of the specified file.
+      summary: Retrieve file content.
       description: >-
+        Retrieve file content.
+
         Returns the contents of the specified file.
       parameters:
         - name: file_id
@@ -857,9 +865,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      summary: >-
-        Get the current health status of the service.
+      summary: Get health status.
       description: >-
+        Get health status.
+
         Get the current health status of the service.
       parameters: []
       deprecated: false
@@ -885,9 +894,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      summary: >-
-        List all available API routes with their methods and implementing providers.
+      summary: List routes.
       description: >-
+        List routes.
+
         List all available API routes with their methods and implementing providers.
       parameters: []
       deprecated: false
@@ -936,8 +946,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      summary: Register a model.
-      description: Register a model.
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
       parameters: []
       requestBody:
         content:
@@ -967,8 +980,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      summary: Get a model by its identifier.
-      description: Get a model by its identifier.
+      summary: Get model.
+      description: >-
+        Get model.
+
+        Get a model by its identifier.
       parameters:
         - name: model_id
           in: path
@@ -993,8 +1009,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Models
-      summary: Unregister a model.
-      description: Unregister a model.
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
       parameters:
         - name: model_id
           in: path
@@ -1025,9 +1044,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
+      summary: Create moderation.
       description: >-
+        Create moderation.
+
         Classifies if text and/or image inputs are potentially harmful.
       parameters: []
       requestBody:
@@ -1083,8 +1103,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: Create a new prompt.
-      description: Create a new prompt.
+      summary: Create prompt.
+      description: >-
+        Create prompt.
+
+        Create a new prompt.
       parameters: []
       requestBody:
         content:
@@ -1114,9 +1137,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: >-
-        Get a prompt by its identifier and optional version.
+      summary: Get prompt.
       description: >-
+        Get prompt.
+
         Get a prompt by its identifier and optional version.
       parameters:
         - name: prompt_id
@@ -1154,9 +1178,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: >-
-        Update an existing prompt (increments version).
+      summary: Update prompt.
       description: >-
+        Update prompt.
+
         Update an existing prompt (increments version).
       parameters:
         - name: prompt_id
@@ -1188,8 +1213,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: Delete a prompt.
-      description: Delete a prompt.
+      summary: Delete prompt.
+      description: >-
+        Delete prompt.
+
+        Delete a prompt.
       parameters:
         - name: prompt_id
           in: path
@@ -1220,9 +1248,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: >-
-        Set which version of a prompt should be the default in get_prompt (latest).
+      summary: Set prompt version.
       description: >-
+        Set prompt version.
+
         Set which version of a prompt should be the default in get_prompt (latest).
       parameters:
         - name: prompt_id
@@ -1260,8 +1289,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Prompts
-      summary: List all versions of a specific prompt.
-      description: List all versions of a specific prompt.
+      summary: List prompt versions.
+      description: >-
+        List prompt versions.
+
+        List all versions of a specific prompt.
       parameters:
         - name: prompt_id
           in: path
@@ -1293,8 +1325,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      summary: List all available providers.
-      description: List all available providers.
+      summary: List providers.
+      description: >-
+        List providers.
+
+        List all available providers.
       parameters: []
       deprecated: false
   /v1/providers/{provider_id}:
@@ -1319,9 +1354,10 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Providers
-      summary: >-
-        Get detailed information about a specific provider.
+      summary: Get provider.
       description: >-
+        Get provider.
+
         Get detailed information about a specific provider.
       parameters:
         - name: provider_id
@@ -1352,8 +1388,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
+      summary: List all responses.
+      description: List all responses.
       parameters:
         - name: after
           in: query
@@ -1404,8 +1440,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: Create a model response.
+      description: Create a model response.
       parameters: []
       requestBody:
         content:
@@ -1447,8 +1483,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
+      summary: Get a model response.
+      description: Get a model response.
       parameters:
         - name: response_id
           in: path
@@ -1478,8 +1514,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
+      summary: Delete a response.
+      description: Delete a response.
       parameters:
         - name: response_id
           in: path
@@ -1509,10 +1545,8 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
+      summary: List input items.
+      description: List input items.
       parameters:
         - name: response_id
           in: path
@@ -1581,8 +1615,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
-      summary: Run a shield.
-      description: Run a shield.
+      summary: Run shield.
+      description: >-
+        Run shield.
+
+        Run a shield.
       parameters: []
       requestBody:
         content:
@@ -3138,8 +3175,11 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
-      summary: Get the version of the service.
-      description: Get the version of the service.
+      summary: Get version.
+      description: >-
+        Get version.
+
+        Get the version of the service.
       parameters: []
       deprecated: false
   /v1beta/datasetio/append-rows/{dataset_id}:
@@ -13795,9 +13835,16 @@ tags:
     x-displayName: >-
       Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: Files
-    description: ''
+    description: >-
+      This API is used to upload documents that can be used with other Llama Stack
+      APIs.
+    x-displayName: Files
   - name: Inference
     description: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
+
+
       This API provides the raw interface to the underlying models. Two kinds of models
       are supported:
 
@@ -13805,25 +13852,27 @@ tags:
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
-    x-displayName: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
+    x-displayName: Inference
   - name: Inspect
-    description: ''
+    description: >-
+      APIs for inspecting the Llama Stack service, including health status, available
+      API routes with methods and implementing providers.
+    x-displayName: Inspect
   - name: Models
     description: ''
   - name: PostTraining (Coming Soon)
     description: ''
   - name: Prompts
-    description: ''
-    x-displayName: >-
+    description: >-
       Protocol for prompt management operations.
+    x-displayName: Prompts
   - name: Providers
-    description: ''
-    x-displayName: >-
+    description: >-
       Providers API for inspecting, listing, and modifying providers and their configurations.
+    x-displayName: Providers
   - name: Safety
-    description: ''
+    description: OpenAI-compatible Moderations API.
+    x-displayName: Safety
   - name: Scoring
     description: ''
   - name: ScoringFunctions
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index cdf47308e..5983b5c45 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -797,7 +797,7 @@ class Agents(Protocol):
         self,
         response_id: str,
     ) -> OpenAIResponseObject:
-        """Retrieve an OpenAI response by its ID.
+        """Get a model response.
 
         :param response_id: The ID of the OpenAI response to retrieve.
         :returns: An OpenAIResponseObject.
@@ -826,7 +826,7 @@ class Agents(Protocol):
             ),
         ] = None,
     ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
-        """Create a new OpenAI response.
+        """Create a model response.
 
         :param input: Input message(s) to create the response.
         :param model: The underlying LLM used for completions.
@@ -846,7 +846,7 @@ class Agents(Protocol):
         model: str | None = None,
         order: Order | None = Order.desc,
     ) -> ListOpenAIResponseObject:
-        """List all OpenAI responses.
+        """List all responses.
 
         :param after: The ID of the last response to return.
         :param limit: The number of responses to return.
@@ -869,7 +869,7 @@ class Agents(Protocol):
         limit: int | None = 20,
         order: Order | None = Order.desc,
     ) -> ListOpenAIResponseInputItem:
-        """List input items for a given OpenAI response.
+        """List input items.
 
         :param response_id: The ID of the response to retrieve input items for.
         :param after: An item ID to list items after, used for pagination.
@@ -884,7 +884,7 @@ class Agents(Protocol):
     @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
-        """Delete an OpenAI response by its ID.
+        """Delete a response.
 
         :param response_id: The ID of the OpenAI response to delete.
         :returns: An OpenAIDeleteResponseObject
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 13f0e95fa..f1d3764db 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -104,6 +104,11 @@ class OpenAIFileDeleteResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Files(Protocol):
+    """Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs.
+    """
+
     # OpenAI Files API Endpoints
     @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
@@ -113,7 +118,8 @@ class Files(Protocol):
         purpose: Annotated[OpenAIFilePurpose, Form()],
         expires_after: Annotated[ExpiresAfter | None, Form()] = None,
     ) -> OpenAIFileObject:
-        """
+        """Upload file.
+
         Upload a file that can be used across various endpoints.
 
         The file upload should be a multipart form request with:
@@ -137,7 +143,8 @@ class Files(Protocol):
         order: Order | None = Order.desc,
         purpose: OpenAIFilePurpose | None = None,
     ) -> ListOpenAIFileResponse:
-        """
+        """List files.
+
         Returns a list of files that belong to the user's organization.
 
         :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
@@ -154,7 +161,8 @@ class Files(Protocol):
         self,
         file_id: str,
     ) -> OpenAIFileObject:
-        """
+        """Retrieve file.
+
         Returns information about a specific file.
 
         :param file_id: The ID of the file to use for this request.
@@ -168,8 +176,7 @@ class Files(Protocol):
         self,
         file_id: str,
     ) -> OpenAIFileDeleteResponse:
-        """
-        Delete a file.
+        """Delete file.
 
         :param file_id: The ID of the file to use for this request.
         :returns: An OpenAIFileDeleteResponse indicating successful deletion.
@@ -182,7 +189,8 @@ class Files(Protocol):
         self,
         file_id: str,
     ) -> Response:
-        """
+        """Retrieve file content.
+
         Returns the contents of the specified file.
 
         :param file_id: The ID of the file to use for this request.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index e88a16315..62a988ea6 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
         # for fill-in-the-middle type completion
         suffix: str | None = None,
     ) -> OpenAICompletion:
-        """Generate an OpenAI-compatible completion for the given prompt using the specified model.
+        """Create completion.
+
+        Generate an OpenAI-compatible completion for the given prompt using the specified model.
 
         :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
         :param prompt: The prompt to generate a completion for.
@@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
         top_p: float | None = None,
         user: str | None = None,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+        """Create chat completions.
+
+        Generate an OpenAI-compatible chat completion for the given messages using the specified model.
 
         :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
         :param messages: List of messages in the conversation.
@@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
         dimensions: int | None = None,
         user: str | None = None,
     ) -> OpenAIEmbeddingsResponse:
-        """Generate OpenAI-compatible embeddings for the given input using the specified model.
+        """Create embeddings.
+
+        Generate OpenAI-compatible embeddings for the given input using the specified model.
 
         :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
         :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
@@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):
 
 
 class Inference(InferenceProvider):
-    """Llama Stack Inference API for generating completions, chat completions, and embeddings.
+    """Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
@@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
         model: str | None = None,
         order: Order | None = Order.desc,
     ) -> ListOpenAIChatCompletionResponse:
-        """List all chat completions.
+        """List chat completions.
 
         :param after: The ID of the last chat completion to return.
         :param limit: The maximum number of chat completions to return.
@@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
     )
     @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
-        """Describe a chat completion by its ID.
+        """Get chat completion.
+
+        Describe a chat completion by its ID.
 
         :param completion_id: ID of the chat completion.
         :returns: A OpenAICompletionWithInputMessages.
diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py
index e859dbe59..72f203621 100644
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@@ -58,9 +58,16 @@ class ListRoutesResponse(BaseModel):
 
 @runtime_checkable
 class Inspect(Protocol):
+    """Inspect
+
+    APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+    """
+
     @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
     async def list_routes(self) -> ListRoutesResponse:
-        """List all available API routes with their methods and implementing providers.
+        """List routes.
+
+        List all available API routes with their methods and implementing providers.
 
         :returns: Response containing information about all available routes.
         """
@@ -68,7 +75,9 @@ class Inspect(Protocol):
 
     @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
     async def health(self) -> HealthInfo:
-        """Get the current health status of the service.
+        """Get health status.
+
+        Get the current health status of the service.
 
         :returns: Health information indicating if the service is operational.
         """
@@ -76,7 +85,9 @@ class Inspect(Protocol):
 
     @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
     async def version(self) -> VersionInfo:
-        """Get the version of the service.
+        """Get version.
+
+        Get the version of the service.
 
         :returns: Version information containing the service version number.
         """
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 210ed9246..10949cb95 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -124,7 +124,9 @@ class Models(Protocol):
         self,
         model_id: str,
     ) -> Model:
-        """Get a model by its identifier.
+        """Get model.
+
+        Get a model by its identifier.
 
         :param model_id: The identifier of the model to get.
         :returns: A Model.
@@ -140,7 +142,9 @@ class Models(Protocol):
         metadata: dict[str, Any] | None = None,
         model_type: ModelType | None = None,
     ) -> Model:
-        """Register a model.
+        """Register model.
+
+        Register a model.
 
         :param model_id: The identifier of the model to register.
         :param provider_model_id: The identifier of the model in the provider.
@@ -156,7 +160,9 @@ class Models(Protocol):
         self,
         model_id: str,
     ) -> None:
-        """Unregister a model.
+        """Unregister model.
+
+        Unregister a model.
 
         :param model_id: The identifier of the model to unregister.
         """
diff --git a/llama_stack/apis/prompts/prompts.py b/llama_stack/apis/prompts/prompts.py
index c56185e25..b39c363c7 100644
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@@ -94,7 +94,9 @@ class ListPromptsResponse(BaseModel):
 @runtime_checkable
 @trace_protocol
 class Prompts(Protocol):
-    """Protocol for prompt management operations."""
+    """Prompts
+
+    Protocol for prompt management operations."""
 
     @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
     async def list_prompts(self) -> ListPromptsResponse:
@@ -109,7 +111,9 @@ class Prompts(Protocol):
         self,
         prompt_id: str,
     ) -> ListPromptsResponse:
-        """List all versions of a specific prompt.
+        """List prompt versions.
+
+        List all versions of a specific prompt.
 
         :param prompt_id: The identifier of the prompt to list versions for.
         :returns: A ListPromptsResponse containing all versions of the prompt.
@@ -122,7 +126,9 @@ class Prompts(Protocol):
         prompt_id: str,
         version: int | None = None,
     ) -> Prompt:
-        """Get a prompt by its identifier and optional version.
+        """Get prompt.
+
+        Get a prompt by its identifier and optional version.
 
         :param prompt_id: The identifier of the prompt to get.
         :param version: The version of the prompt to get (defaults to latest).
@@ -136,7 +142,9 @@ class Prompts(Protocol):
         prompt: str,
         variables: list[str] | None = None,
     ) -> Prompt:
-        """Create a new prompt.
+        """Create prompt.
+
+        Create a new prompt.
 
         :param prompt: The prompt text content with variable placeholders.
         :param variables: List of variable names that can be used in the prompt template.
@@ -153,7 +161,9 @@ class Prompts(Protocol):
         variables: list[str] | None = None,
         set_as_default: bool = True,
     ) -> Prompt:
-        """Update an existing prompt (increments version).
+        """Update prompt.
+
+        Update an existing prompt (increments version).
 
         :param prompt_id: The identifier of the prompt to update.
         :param prompt: The updated prompt text content.
@@ -169,7 +179,9 @@ class Prompts(Protocol):
         self,
         prompt_id: str,
     ) -> None:
-        """Delete a prompt.
+        """Delete prompt.
+
+        Delete a prompt.
 
         :param prompt_id: The identifier of the prompt to delete.
         """
@@ -181,7 +193,9 @@ class Prompts(Protocol):
         prompt_id: str,
         version: int,
     ) -> Prompt:
-        """Set which version of a prompt should be the default in get_prompt (latest).
+        """Set prompt version.
+
+        Set which version of a prompt should be the default in get_prompt (latest).
 
         :param prompt_id: The identifier of the prompt.
         :param version: The version to set as default.
diff --git a/llama_stack/apis/providers/providers.py b/llama_stack/apis/providers/providers.py
index d1cff0f6c..e1872571d 100644
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@@ -42,13 +42,16 @@ class ListProvidersResponse(BaseModel):
 
 @runtime_checkable
 class Providers(Protocol):
-    """
+    """Providers
+
     Providers API for inspecting, listing, and modifying providers and their configurations.
     """
 
     @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
     async def list_providers(self) -> ListProvidersResponse:
-        """List all available providers.
+        """List providers.
+
+        List all available providers.
 
         :returns: A ListProvidersResponse containing information about all providers.
         """
@@ -56,7 +59,9 @@ class Providers(Protocol):
 
     @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def inspect_provider(self, provider_id: str) -> ProviderInfo:
-        """Get detailed information about a specific provider.
+        """Get provider.
+
+        Get detailed information about a specific provider.
 
         :param provider_id: The ID of the provider to inspect.
         :returns: A ProviderInfo object containing the provider's details.
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index 0fa250d90..2ae74b0a7 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -96,6 +96,11 @@ class ShieldStore(Protocol):
 @runtime_checkable
 @trace_protocol
 class Safety(Protocol):
+    """Safety
+
+    OpenAI-compatible Moderations API.
+    """
+
     shield_store: ShieldStore
 
     @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
@@ -105,7 +110,9 @@ class Safety(Protocol):
         messages: list[Message],
         params: dict[str, Any],
     ) -> RunShieldResponse:
-        """Run a shield.
+        """Run shield.
+
+        Run a shield.
 
         :param shield_id: The identifier of the shield to run.
         :param messages: The messages to run the shield on.
@@ -117,7 +124,9 @@ class Safety(Protocol):
     @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
     async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
-        """Classifies if text and/or image inputs are potentially harmful.
+        """Create moderation.
+
+        Classifies if text and/or image inputs are potentially harmful.
         :param input: Input (or inputs) to classify.
         Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
         :param model: The content moderation model you would like to use.

From 696fefbf17491d52c9a5acfd71ff91eec415c0cc Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Mon, 6 Oct 2025 12:16:26 -0700
Subject: [PATCH 05/14] chore: logger category fix (#3706)

# What does this PR do?
WARNING 2025-10-06 12:01:45,137 root:266 uncategorized: Unknown logging
category: tokenizer_utils. Falling back to default 'root' level: 20

## Test Plan
---
 llama_stack/models/llama/tokenizer_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/models/llama/tokenizer_utils.py b/llama_stack/models/llama/tokenizer_utils.py
index 9830bb61b..05da410a1 100644
--- a/llama_stack/models/llama/tokenizer_utils.py
+++ b/llama_stack/models/llama/tokenizer_utils.py
@@ -9,7 +9,7 @@ from pathlib import Path
 
 from llama_stack.log import get_logger
 
-logger = get_logger(__name__, "tokenizer_utils")
+logger = get_logger(__name__, "models")
 
 
 def load_bpe_file(model_path: Path) -> dict[bytes, int]:

From 597d405e1303fd8e716ec2c8b2bb7855d69c6093 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Mon, 6 Oct 2025 14:44:01 -0700
Subject: [PATCH 06/14] chore: fix closing error (#3709)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
Gets rid of this error message below (disclaimer: not sure why, but it
does).

ERROR 2025-10-06 12:04:22,837 asyncio:118 uncategorized: Task exception
was never retrieved
future: <Task finished name='Task-36' coro=<AsyncClient.aclose() done,
defined at

/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpx/_client.py:1978>
exception=RuntimeError('unable to perform operation on <TCPTransport
closed=True reading=False 0x122dc7ad0>; the handler is closed')>
╭───────────────────────────────────────────────────────────────────
Traceback (most recent call last)
───────────────────────────────────────────────────────────────────╮
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpx/_client.py:1985
in aclose │
│ │
│ 1982 │ │ if self._state != ClientState.CLOSED: │
│ 1983 │ │ │ self._state = ClientState.CLOSED │
│ 1984 │ │ │ │
│ ❱ 1985 │ │ │ await self._transport.aclose() │
│ 1986 │ │ │ for proxy in self._mounts.values(): │
│ 1987 │ │ │ │ if proxy is not None: │
│ 1988 │ │ │ │ │ await proxy.aclose() │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpx/_transports/default.py:406
in aclose │
│ │
│ 403 │ │ ) │
│ 404 │ │
│ 405 │ async def aclose(self) -> None: │
│ ❱ 406 │ │ await self._pool.aclose() │
│ 407 │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py:353
in aclose │
│ │
│ 350 │ │ with self._optional_thread_lock: │
│ 351 │ │ │ closing_connections = list(self._connections) │
│ 352 │ │ │ self._connections = [] │
│ ❱ 353 │ │ await self._close_connections(closing_connections) │
│ 354 │ │
│ 355 │ async def __aenter__(self) -> AsyncConnectionPool: │
│ 356 │ │ return self │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py:345
in _close_connections │
│ │
│ 342 │ │ # Close connections which have been removed from the pool. │
│ 343 │ │ with AsyncShieldCancellation(): │
│ 344 │ │ │ for connection in closing: │
│ ❱ 345 │ │ │ │ await connection.aclose() │
│ 346 │ │
│ 347 │ async def aclose(self) -> None: │
│ 348 │ │ # Explicitly close the connection pool. │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py:173
in aclose │
│ │
│ 170 │ async def aclose(self) -> None: │
│ 171 │ │ if self._connection is not None: │
│ 172 │ │ │ async with Trace("close", logger, None, {}): │
│ ❱ 173 │ │ │ │ await self._connection.aclose() │
│ 174 │ │
│ 175 │ def is_available(self) -> bool: │
│ 176 │ │ if self._connection is None: │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpcore/_async/http11.py:258
in aclose │
│ │
│ 255 │ │ # Note that this method unilaterally closes the connection,
and does │
│ 256 │ │ # not have any kind of locking in place around it. │
│ 257 │ │ self._state = HTTPConnectionState.CLOSED │
│ ❱ 258 │ │ await self._network_stream.aclose() │
│ 259 │ │
│ 260 │ # The AsyncConnectionInterface methods provide information about
the state of │
│ 261 │ # the connection, allowing for a connection pooling
implementation to │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py:53
in aclose │
│ │
│ 50 │ │ │ │ await self._stream.send(item=buffer) │
│ 51 │ │
│ 52 │ async def aclose(self) -> None: │
│ ❱ 53 │ │ await self._stream.aclose() │
│ 54 │ │
│ 55 │ async def start_tls( │
│ 56 │ │ self, │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/anyio/streams/tls.py:216
in aclose │
│ │
│ 213 │ │ │ │ await aclose_forcefully(self.transport_stream) │
│ 214 │ │ │ │ raise │
│ 215 │ │ │
│ ❱ 216 │ │ await self.transport_stream.aclose() │
│ 217 │ │
│ 218 │ async def receive(self, max_bytes: int = 65536) -> bytes: │
│ 219 │ │ data = await
self._call_sslobject_method(self._ssl_object.read, max_bytes) │
│ │
│
/Users/erichuang/projects/llama-stack-git2/.venv/lib/python3.12/site-packages/anyio/_backends/_asyncio.py:1310
in aclose │
│ │
│ 1307 │ │ if not self._transport.is_closing(): │
│ 1308 │ │ │ self._closed = True │
│ 1309 │ │ │ try: │
│ ❱ 1310 │ │ │ │ self._transport.write_eof() │
│ 1311 │ │ │ except OSError: │
│ 1312 │ │ │ │ pass │
│ 1313 │
│ │
│ in uvloop.loop.UVStream.write_eof:703 │
│ │
│ in uvloop.loop.UVHandle._ensure_alive:159 │

╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: unable to perform operation on <TCPTransport closed=True
reading=False 0x122dc7ad0>; the handler is closed

## Test Plan
Run
uv run --with llama-stack llama stack build --distro=starter
--image-type=venv --run

No more error
---
 llama_stack/providers/utils/inference/openai_mixin.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py
index acca73800..9137013ee 100644
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@@ -132,7 +132,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
 
         :return: An iterable of model IDs or None if not implemented
         """
-        return [m.id async for m in self.client.models.list()]
+        client = self.client
+        async with client:
+            model_ids = [m.id async for m in client.models.list()]
+        return model_ids
 
     async def initialize(self) -> None:
         """

From bba9957eddb350e46714f142718fac32602f2a18 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Mon, 6 Oct 2025 16:58:22 -0700
Subject: [PATCH 07/14] feat(api): Add vector store file batches api (#3642)

# What does this PR do?

Add Open AI Compatible vector store file batches api. This functionality
is needed to attach many files to a vector store as a batch.
https://github.com/llamastack/llama-stack/issues/3533

API Stubs have been merged
https://github.com/llamastack/llama-stack/pull/3615
Adds persistence for file batches as discussed in diff
https://github.com/llamastack/llama-stack/pull/3544
(Used claude code for generation and reviewed by me)


## Test Plan
1. Unit tests pass
2. Also verified the cc-vec integration with LLamaStackClient works with
the file batches api. https://github.com/raghotham/cc-vec
2. Integration tests pass
---
 llama_stack/core/routing_tables/vector_dbs.py |  62 ++
 .../providers/inline/vector_io/faiss/faiss.py |   4 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |   4 +-
 .../remote/vector_io/chroma/chroma.py         |   3 +-
 .../remote/vector_io/milvus/milvus.py         |   4 +-
 .../remote/vector_io/pgvector/pgvector.py     |   4 +-
 .../remote/vector_io/qdrant/qdrant.py         |   6 +-
 .../remote/vector_io/weaviate/weaviate.py     |   4 +-
 .../utils/memory/openai_vector_store_mixin.py | 438 +++++++++-
 ...54792b9f22d2cb4522eab802810be8672d3dc.json |  23 +-
 ...5af4cccef848e79440e4298528185efe45ed4.json | 807 ++++++++++++++++++
 ...0b4ab85adbbe4903193947ce19260447e8619.json | 807 ++++++++++++++++++
 ...eb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json | 423 +++++++++
 ...7b932ab697ffaa1cc79a7caf46b404677fb31.json | 423 +++++++++
 ...7beaa67a3824a6cd5d7a0e21c8e587ea03980.json | 807 ++++++++++++++++++
 ...2d5759b33d997f7b1305fc20ae7f7c50faa26.json | 423 +++++++++
 ...2030f2bd2ee609d672e9f44275c601800b144.json | 423 +++++++++
 ...2576f78baaac506fc2c69bb14079201783605.json | 807 ++++++++++++++++++
 ...34a95f56931b792d5939f4cebc57-44869b1b.json |  34 +
 ...34a95f56931b792d5939f4cebc57-79be7c70.json |  25 +
 ...34a95f56931b792d5939f4cebc57-7becc84f.json |  70 ++
 ...34a95f56931b792d5939f4cebc57-e3b0c442.json |  15 +
 ...34a95f56931b792d5939f4cebc57-e8733dec.json |  25 +
 ...f93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json | 528 ++++++++++++
 .../models-7d9446738fd7-d5d684a3.json         | 527 ++++++++++++
 .../models-bd032f995f2a-7becc84f.json         |  69 ++
 ...7b1a0700b6c74372d556c873dda39c603d844.json | 423 +++++++++
 ...9ae8f46ac7086afb48820a36c41a3cb994cb9.json | 423 +++++++++
 ...d4baa865eda04d9d92e418a7e58da7be1bc2b.json | 423 +++++++++
 ...c1c82f2602f796edcdbf8c9813a5a3a82825b.json |  39 +
 ...77757f7abbd584a52b47259fb0a903922eec0.json | 423 +++++++++
 ...710b364496c1ee21a75258205830e1df7221b.json | 423 +++++++++
 ...747c9983b0d40cc83fcfd90c5e733ecfb5a35.json |  39 +
 ...d0f2756377d5ed83818898fd4c4c67df8ade6.json | 423 +++++++++
 ...536f34da3ce7e80eba86bec16d231aa347d00.json |  20 +
 .../vector_io/test_openai_vector_stores.py    | 287 +++++++
 .../test_vector_io_openai_vector_stores.py    | 687 ++++++++++++++-
 37 files changed, 10322 insertions(+), 53 deletions(-)
 create mode 100644 tests/integration/common/recordings/8b6244e7be7e4d03874b13df9cb5af4cccef848e79440e4298528185efe45ed4.json
 create mode 100644 tests/integration/common/recordings/9ca52f6470a742d637b9da12ff00b4ab85adbbe4903193947ce19260447e8619.json
 create mode 100644 tests/integration/common/recordings/aa45f61f2d277765422722394dbeb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json
 create mode 100644 tests/integration/common/recordings/d48ba62fab4e243d368ec42e5497b932ab697ffaa1cc79a7caf46b404677fb31.json
 create mode 100644 tests/integration/common/recordings/e297006956fc1fb184d0bbaa79f7beaa67a3824a6cd5d7a0e21c8e587ea03980.json
 create mode 100644 tests/integration/common/recordings/f4b0cf7f241feb7ff68414545a42d5759b33d997f7b1305fc20ae7f7c50faa26.json
 create mode 100644 tests/integration/common/recordings/f7a80fae588892aa7031ac972c12030f2bd2ee609d672e9f44275c601800b144.json
 create mode 100644 tests/integration/common/recordings/ff568685962ecba61ca6c2811cb2576f78baaac506fc2c69bb14079201783605.json
 create mode 100644 tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-44869b1b.json
 create mode 100644 tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-79be7c70.json
 create mode 100644 tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-7becc84f.json
 create mode 100644 tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e3b0c442.json
 create mode 100644 tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e8733dec.json
 create mode 100644 tests/integration/common/recordings/models-bd3df37825f32706c88677a327960bfa47dcf93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json
 create mode 100644 tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
 create mode 100644 tests/integration/recordings/responses/models-bd032f995f2a-7becc84f.json
 create mode 100644 tests/integration/vector_io/recordings/0fbf282a067bb1fe2c9fa5c96287b1a0700b6c74372d556c873dda39c603d844.json
 create mode 100644 tests/integration/vector_io/recordings/4b8ce5031f00e754bbb6e1f55109ae8f46ac7086afb48820a36c41a3cb994cb9.json
 create mode 100644 tests/integration/vector_io/recordings/506216767e53ce1a6ef47637a97d4baa865eda04d9d92e418a7e58da7be1bc2b.json
 create mode 100644 tests/integration/vector_io/recordings/54f7bec4d7073965af5f612d096c1c82f2602f796edcdbf8c9813a5a3a82825b.json
 create mode 100644 tests/integration/vector_io/recordings/8158c78a51cf32f35b849dd054077757f7abbd584a52b47259fb0a903922eec0.json
 create mode 100644 tests/integration/vector_io/recordings/943a7db9bab0934c95417e8befe710b364496c1ee21a75258205830e1df7221b.json
 create mode 100644 tests/integration/vector_io/recordings/995712d2e4441339fdd8ca21d87747c9983b0d40cc83fcfd90c5e733ecfb5a35.json
 create mode 100644 tests/integration/vector_io/recordings/da1e7b0f80936e70deaa09b6678d0f2756377d5ed83818898fd4c4c67df8ade6.json
 create mode 100644 tests/integration/vector_io/recordings/ffea5475c2625b87e302ec419cc536f34da3ce7e80eba86bec16d231aa347d00.json

diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py
index 497894064..932bbdba8 100644
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@@ -245,3 +245,65 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
             vector_store_id=vector_store_id,
             file_id=file_id,
         )
+
+    async def openai_create_vector_store_file_batch(
+        self,
+        vector_store_id: str,
+        file_ids: list[str],
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: Any | None = None,
+    ):
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_create_vector_store_file_batch(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            attributes=attributes,
+            chunking_strategy=chunking_strategy,
+        )
+
+    async def openai_retrieve_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ):
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_retrieve_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+        )
+
+    async def openai_list_files_in_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        filter: str | None = None,
+        limit: int | None = 20,
+        order: str | None = "desc",
+    ):
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_list_files_in_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+            after=after,
+            before=before,
+            filter=filter,
+            limit=limit,
+            order=order,
+        )
+
+    async def openai_cancel_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ):
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_cancel_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+        )
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 258c6e7aa..405c134e5 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -200,12 +200,10 @@ class FaissIndex(EmbeddingIndex):
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
     def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.files_api = files_api
         self.cache: dict[str, VectorDBWithIndex] = {}
-        self.kvstore: KVStore | None = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.kvstore)
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index f34f8f6fb..26231a9b7 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -410,12 +410,10 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
     """
 
     def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.files_api = files_api
         self.cache: dict[str, VectorDBWithIndex] = {}
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
-        self.kvstore: KVStore | None = None
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.kvstore)
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index a9ec644ef..511123d6e 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -140,14 +140,13 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         inference_api: Api.inference,
         files_api: Files | None,
     ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
         self.config = config
         self.inference_api = inference_api
         self.client = None
         self.cache = {}
-        self.kvstore: KVStore | None = None
         self.vector_db_store = None
-        self.files_api = files_api
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.kvstore)
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index e07e8ff12..0acc90595 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -309,14 +309,12 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         inference_api: Inference,
         files_api: Files | None,
     ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.cache = {}
         self.client = None
         self.inference_api = inference_api
-        self.files_api = files_api
-        self.kvstore: KVStore | None = None
         self.vector_db_store = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     async def initialize(self) -> None:
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 1c140e782..dfdfef6eb 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -345,14 +345,12 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
         inference_api: Api.inference,
         files_api: Files | None = None,
     ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
         self.conn = None
         self.cache = {}
-        self.files_api = files_api
-        self.kvstore: KVStore | None = None
         self.vector_db_store = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     async def initialize(self) -> None:
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index ec3869495..6b386840c 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -27,7 +27,7 @@ from llama_stack.apis.vector_io import (
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
-from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
     ChunkForDeletion,
@@ -162,14 +162,12 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         inference_api: Api.inference,
         files_api: Files | None = None,
     ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.client: AsyncQdrantClient = None
         self.cache = {}
         self.inference_api = inference_api
-        self.files_api = files_api
         self.vector_db_store = None
-        self.kvstore: KVStore | None = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
         self._qdrant_lock = asyncio.Lock()
 
     async def initialize(self) -> None:
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 02d132106..54ac6f8d3 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -284,14 +284,12 @@ class WeaviateVectorIOAdapter(
         inference_api: Api.inference,
         files_api: Files | None,
     ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
         self.client_cache = {}
         self.cache = {}
-        self.files_api = files_api
-        self.kvstore: KVStore | None = None
         self.vector_db_store = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
     def _get_client(self) -> weaviate.WeaviateClient:
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 36432767f..0d0aa25a4 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -12,6 +12,8 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Any
 
+from pydantic import TypeAdapter
+
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files, OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
@@ -50,12 +52,16 @@ logger = get_logger(name=__name__, category="providers::utils")
 
 # Constants for OpenAI vector stores
 CHUNK_MULTIPLIER = 5
+FILE_BATCH_CLEANUP_INTERVAL_SECONDS = 24 * 60 * 60  # 1 day in seconds
+MAX_CONCURRENT_FILES_PER_BATCH = 3  # Maximum concurrent file processing within a batch
+FILE_BATCH_CHUNK_SIZE = 10  # Process files in chunks of this size
 
 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
+OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX = f"openai_vector_stores_file_batches:{VERSION}::"
 
 
 class OpenAIVectorStoreMixin(ABC):
@@ -65,11 +71,15 @@ class OpenAIVectorStoreMixin(ABC):
     an openai_vector_stores in-memory cache.
     """
 
-    # These should be provided by the implementing class
-    openai_vector_stores: dict[str, dict[str, Any]]
-    files_api: Files | None
-    # KV store for persisting OpenAI vector store metadata
-    kvstore: KVStore | None
+    # Implementing classes should call super().__init__() in their __init__ method
+    # to properly initialize the mixin attributes.
+    def __init__(self, files_api: Files | None = None, kvstore: KVStore | None = None):
+        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
+        self.openai_file_batches: dict[str, dict[str, Any]] = {}
+        self.files_api = files_api
+        self.kvstore = kvstore
+        self._last_file_batch_cleanup_time = 0
+        self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
 
     async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
         """Save vector store metadata to persistent storage."""
@@ -159,9 +169,129 @@ class OpenAIVectorStoreMixin(ABC):
         for idx in range(len(raw_items)):
             await self.kvstore.delete(f"{contents_prefix}{idx}")
 
+    async def _save_openai_vector_store_file_batch(self, batch_id: str, batch_info: dict[str, Any]) -> None:
+        """Save file batch metadata to persistent storage."""
+        assert self.kvstore
+        key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
+        await self.kvstore.set(key=key, value=json.dumps(batch_info))
+        # update in-memory cache
+        self.openai_file_batches[batch_id] = batch_info
+
+    async def _load_openai_vector_store_file_batches(self) -> dict[str, dict[str, Any]]:
+        """Load all file batch metadata from persistent storage."""
+        assert self.kvstore
+        start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
+        end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
+        stored_data = await self.kvstore.values_in_range(start_key, end_key)
+
+        batches: dict[str, dict[str, Any]] = {}
+        for item in stored_data:
+            info = json.loads(item)
+            batches[info["id"]] = info
+        return batches
+
+    async def _delete_openai_vector_store_file_batch(self, batch_id: str) -> None:
+        """Delete file batch metadata from persistent storage and in-memory cache."""
+        assert self.kvstore
+        key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
+        await self.kvstore.delete(key)
+        # remove from in-memory cache
+        self.openai_file_batches.pop(batch_id, None)
+
+    async def _cleanup_expired_file_batches(self) -> None:
+        """Clean up expired file batches from persistent storage."""
+        assert self.kvstore
+        start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
+        end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
+        stored_data = await self.kvstore.values_in_range(start_key, end_key)
+
+        current_time = int(time.time())
+        expired_count = 0
+
+        for item in stored_data:
+            info = json.loads(item)
+            expires_at = info.get("expires_at")
+            if expires_at and current_time > expires_at:
+                logger.info(f"Cleaning up expired file batch: {info['id']}")
+                await self.kvstore.delete(f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{info['id']}")
+                # Remove from in-memory cache if present
+                self.openai_file_batches.pop(info["id"], None)
+                expired_count += 1
+
+        if expired_count > 0:
+            logger.info(f"Cleaned up {expired_count} expired file batches")
+
+    async def _get_completed_files_in_batch(self, vector_store_id: str, file_ids: list[str]) -> set[str]:
+        """Determine which files in a batch are actually completed by checking vector store file_ids."""
+        if vector_store_id not in self.openai_vector_stores:
+            return set()
+
+        store_info = self.openai_vector_stores[vector_store_id]
+        completed_files = set(file_ids) & set(store_info["file_ids"])
+        return completed_files
+
+    async def _analyze_batch_completion_on_resume(self, batch_id: str, batch_info: dict[str, Any]) -> list[str]:
+        """Analyze batch completion status and return remaining files to process.
+
+        Returns:
+            List of file IDs that still need processing. Empty list if batch is complete.
+        """
+        vector_store_id = batch_info["vector_store_id"]
+        all_file_ids = batch_info["file_ids"]
+
+        # Find files that are actually completed
+        completed_files = await self._get_completed_files_in_batch(vector_store_id, all_file_ids)
+        remaining_files = [file_id for file_id in all_file_ids if file_id not in completed_files]
+
+        completed_count = len(completed_files)
+        total_count = len(all_file_ids)
+        remaining_count = len(remaining_files)
+
+        # Update file counts to reflect actual state
+        batch_info["file_counts"] = {
+            "completed": completed_count,
+            "failed": 0,  # We don't track failed files during resume - they'll be retried
+            "in_progress": remaining_count,
+            "cancelled": 0,
+            "total": total_count,
+        }
+
+        # If all files are already completed, mark batch as completed
+        if remaining_count == 0:
+            batch_info["status"] = "completed"
+            logger.info(f"Batch {batch_id} is already fully completed, updating status")
+
+        # Save updated batch info
+        await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+        return remaining_files
+
+    async def _resume_incomplete_batches(self) -> None:
+        """Resume processing of incomplete file batches after server restart."""
+        for batch_id, batch_info in self.openai_file_batches.items():
+            if batch_info["status"] == "in_progress":
+                logger.info(f"Analyzing incomplete file batch: {batch_id}")
+
+                remaining_files = await self._analyze_batch_completion_on_resume(batch_id, batch_info)
+
+                # Check if batch is now completed after analysis
+                if batch_info["status"] == "completed":
+                    continue
+
+                if remaining_files:
+                    logger.info(f"Resuming batch {batch_id} with {len(remaining_files)} remaining files")
+                    # Restart the background processing task with only remaining files
+                    task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info, remaining_files))
+                    self._file_batch_tasks[batch_id] = task
+
     async def initialize_openai_vector_stores(self) -> None:
-        """Load existing OpenAI vector stores into the in-memory cache."""
+        """Load existing OpenAI vector stores and file batches into the in-memory cache."""
         self.openai_vector_stores = await self._load_openai_vector_stores()
+        self.openai_file_batches = await self._load_openai_vector_store_file_batches()
+        self._file_batch_tasks = {}
+        # TODO: Resume only works for single worker deployment. Jobs with multiple workers will need to be handled differently.
+        await self._resume_incomplete_batches()
+        self._last_file_batch_cleanup_time = 0
 
     @abstractmethod
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
@@ -570,6 +700,14 @@ class OpenAIVectorStoreMixin(ABC):
         if vector_store_id not in self.openai_vector_stores:
             raise VectorStoreNotFoundError(vector_store_id)
 
+        # Check if file is already attached to this vector store
+        store_info = self.openai_vector_stores[vector_store_id]
+        if file_id in store_info["file_ids"]:
+            logger.warning(f"File {file_id} is already attached to vector store {vector_store_id}, skipping")
+            # Return existing file object
+            file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+            return VectorStoreFileObject(**file_info)
+
         attributes = attributes or {}
         chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
         created_at = int(time.time())
@@ -615,7 +753,6 @@ class OpenAIVectorStoreMixin(ABC):
                 chunk_overlap_tokens,
                 attributes,
             )
-
             if not chunks:
                 vector_store_file_object.status = "failed"
                 vector_store_file_object.last_error = VectorStoreFileLastError(
@@ -828,7 +965,230 @@ class OpenAIVectorStoreMixin(ABC):
         chunking_strategy: VectorStoreChunkingStrategy | None = None,
     ) -> VectorStoreFileBatchObject:
         """Create a vector store file batch."""
-        raise NotImplementedError("openai_create_vector_store_file_batch is not implemented yet")
+        if vector_store_id not in self.openai_vector_stores:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
+
+        created_at = int(time.time())
+        batch_id = f"batch_{uuid.uuid4()}"
+        # File batches expire after 7 days
+        expires_at = created_at + (7 * 24 * 60 * 60)
+
+        # Initialize batch file counts - all files start as in_progress
+        file_counts = VectorStoreFileCounts(
+            completed=0,
+            cancelled=0,
+            failed=0,
+            in_progress=len(file_ids),
+            total=len(file_ids),
+        )
+
+        # Create batch object immediately with in_progress status
+        batch_object = VectorStoreFileBatchObject(
+            id=batch_id,
+            created_at=created_at,
+            vector_store_id=vector_store_id,
+            status="in_progress",
+            file_counts=file_counts,
+        )
+
+        batch_info = {
+            **batch_object.model_dump(),
+            "file_ids": file_ids,
+            "attributes": attributes,
+            "chunking_strategy": chunking_strategy.model_dump(),
+            "expires_at": expires_at,
+        }
+        await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+        # Start background processing of files
+        task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info))
+        self._file_batch_tasks[batch_id] = task
+
+        # Run cleanup if needed (throttled to once every 1 day)
+        current_time = int(time.time())
+        if current_time - self._last_file_batch_cleanup_time >= FILE_BATCH_CLEANUP_INTERVAL_SECONDS:
+            logger.info("Running throttled cleanup of expired file batches")
+            asyncio.create_task(self._cleanup_expired_file_batches())
+            self._last_file_batch_cleanup_time = current_time
+
+        return batch_object
+
+    async def _process_files_with_concurrency(
+        self,
+        file_ids: list[str],
+        vector_store_id: str,
+        attributes: dict[str, Any],
+        chunking_strategy_obj: Any,
+        batch_id: str,
+        batch_info: dict[str, Any],
+    ) -> None:
+        """Process files with controlled concurrency and chunking."""
+        semaphore = asyncio.Semaphore(MAX_CONCURRENT_FILES_PER_BATCH)
+
+        async def process_single_file(file_id: str) -> tuple[str, bool]:
+            """Process a single file with concurrency control."""
+            async with semaphore:
+                try:
+                    vector_store_file_object = await self.openai_attach_file_to_vector_store(
+                        vector_store_id=vector_store_id,
+                        file_id=file_id,
+                        attributes=attributes,
+                        chunking_strategy=chunking_strategy_obj,
+                    )
+                    return file_id, vector_store_file_object.status == "completed"
+                except Exception as e:
+                    logger.error(f"Failed to process file {file_id} in batch {batch_id}: {e}")
+                    return file_id, False
+
+        # Process files in chunks to avoid creating too many tasks at once
+        total_files = len(file_ids)
+        for chunk_start in range(0, total_files, FILE_BATCH_CHUNK_SIZE):
+            chunk_end = min(chunk_start + FILE_BATCH_CHUNK_SIZE, total_files)
+            chunk = file_ids[chunk_start:chunk_end]
+
+            chunk_num = chunk_start // FILE_BATCH_CHUNK_SIZE + 1
+            total_chunks = (total_files + FILE_BATCH_CHUNK_SIZE - 1) // FILE_BATCH_CHUNK_SIZE
+            logger.info(
+                f"Processing chunk {chunk_num} of {total_chunks} ({len(chunk)} files, {chunk_start + 1}-{chunk_end} of {total_files} total files)"
+            )
+
+            async with asyncio.TaskGroup() as tg:
+                chunk_tasks = [tg.create_task(process_single_file(file_id)) for file_id in chunk]
+
+            chunk_results = [task.result() for task in chunk_tasks]
+
+            # Update counts after each chunk for progressive feedback
+            for _, success in chunk_results:
+                self._update_file_counts(batch_info, success=success)
+
+            # Save progress after each chunk
+            await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+    def _update_file_counts(self, batch_info: dict[str, Any], success: bool) -> None:
+        """Update file counts based on processing result."""
+        if success:
+            batch_info["file_counts"]["completed"] += 1
+        else:
+            batch_info["file_counts"]["failed"] += 1
+        batch_info["file_counts"]["in_progress"] -= 1
+
+    def _update_batch_status(self, batch_info: dict[str, Any]) -> None:
+        """Update final batch status based on file processing results."""
+        if batch_info["file_counts"]["failed"] == 0:
+            batch_info["status"] = "completed"
+        elif batch_info["file_counts"]["completed"] == 0:
+            batch_info["status"] = "failed"
+        else:
+            batch_info["status"] = "completed"  # Partial success counts as completed
+
+    async def _process_file_batch_async(
+        self,
+        batch_id: str,
+        batch_info: dict[str, Any],
+        override_file_ids: list[str] | None = None,
+    ) -> None:
+        """Process files in a batch asynchronously in the background."""
+        file_ids = override_file_ids if override_file_ids is not None else batch_info["file_ids"]
+        attributes = batch_info["attributes"]
+        chunking_strategy = batch_info["chunking_strategy"]
+        vector_store_id = batch_info["vector_store_id"]
+        chunking_strategy_adapter: TypeAdapter[VectorStoreChunkingStrategy] = TypeAdapter(VectorStoreChunkingStrategy)
+        chunking_strategy_obj = chunking_strategy_adapter.validate_python(chunking_strategy)
+
+        try:
+            # Process all files with controlled concurrency
+            await self._process_files_with_concurrency(
+                file_ids=file_ids,
+                vector_store_id=vector_store_id,
+                attributes=attributes,
+                chunking_strategy_obj=chunking_strategy_obj,
+                batch_id=batch_id,
+                batch_info=batch_info,
+            )
+
+            # Update final batch status
+            self._update_batch_status(batch_info)
+            await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+            logger.info(f"File batch {batch_id} processing completed with status: {batch_info['status']}")
+
+        except asyncio.CancelledError:
+            logger.info(f"File batch {batch_id} processing was cancelled")
+            # Clean up task reference if it still exists
+            self._file_batch_tasks.pop(batch_id, None)
+            raise  # Re-raise to ensure proper cancellation propagation
+        finally:
+            # Always clean up task reference when processing ends
+            self._file_batch_tasks.pop(batch_id, None)
+
+    def _get_and_validate_batch(self, batch_id: str, vector_store_id: str) -> dict[str, Any]:
+        """Get and validate batch exists and belongs to vector store."""
+        if vector_store_id not in self.openai_vector_stores:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        if batch_id not in self.openai_file_batches:
+            raise ValueError(f"File batch {batch_id} not found")
+
+        batch_info = self.openai_file_batches[batch_id]
+
+        # Check if batch has expired (read-only check)
+        expires_at = batch_info.get("expires_at")
+        if expires_at:
+            current_time = int(time.time())
+            if current_time > expires_at:
+                raise ValueError(f"File batch {batch_id} has expired after 7 days from creation")
+
+        if batch_info["vector_store_id"] != vector_store_id:
+            raise ValueError(f"File batch {batch_id} does not belong to vector store {vector_store_id}")
+
+        return batch_info
+
+    def _paginate_objects(
+        self,
+        objects: list[Any],
+        limit: int | None = 20,
+        after: str | None = None,
+        before: str | None = None,
+    ) -> tuple[list[Any], bool, str | None, str | None]:
+        """Apply pagination to a list of objects with id fields."""
+        limit = min(limit or 20, 100)  # Cap at 100 as per OpenAI
+
+        # Find start index
+        start_idx = 0
+        if after:
+            for i, obj in enumerate(objects):
+                if obj.id == after:
+                    start_idx = i + 1
+                    break
+
+        # Find end index
+        end_idx = start_idx + limit
+        if before:
+            for i, obj in enumerate(objects[start_idx:], start_idx):
+                if obj.id == before:
+                    end_idx = i
+                    break
+
+        # Apply pagination
+        paginated_objects = objects[start_idx:end_idx]
+
+        # Determine pagination info
+        has_more = end_idx < len(objects)
+        first_id = paginated_objects[0].id if paginated_objects else None
+        last_id = paginated_objects[-1].id if paginated_objects else None
+
+        return paginated_objects, has_more, first_id, last_id
+
+    async def openai_retrieve_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ) -> VectorStoreFileBatchObject:
+        """Retrieve a vector store file batch."""
+        batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
+        return VectorStoreFileBatchObject(**batch_info)
 
     async def openai_list_files_in_vector_store_file_batch(
         self,
@@ -841,15 +1201,39 @@ class OpenAIVectorStoreMixin(ABC):
         order: str | None = "desc",
     ) -> VectorStoreFilesListInBatchResponse:
         """Returns a list of vector store files in a batch."""
-        raise NotImplementedError("openai_list_files_in_vector_store_file_batch is not implemented yet")
+        batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
+        batch_file_ids = batch_info["file_ids"]
 
-    async def openai_retrieve_vector_store_file_batch(
-        self,
-        batch_id: str,
-        vector_store_id: str,
-    ) -> VectorStoreFileBatchObject:
-        """Retrieve a vector store file batch."""
-        raise NotImplementedError("openai_retrieve_vector_store_file_batch is not implemented yet")
+        # Load file objects for files in this batch
+        batch_file_objects = []
+
+        for file_id in batch_file_ids:
+            try:
+                file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+                file_object = VectorStoreFileObject(**file_info)
+
+                # Apply status filter if provided
+                if filter and file_object.status != filter:
+                    continue
+
+                batch_file_objects.append(file_object)
+            except Exception as e:
+                logger.warning(f"Could not load file {file_id} from batch {batch_id}: {e}")
+                continue
+
+        # Sort by created_at
+        reverse_order = order == "desc"
+        batch_file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
+
+        # Apply pagination using helper
+        paginated_files, has_more, first_id, last_id = self._paginate_objects(batch_file_objects, limit, after, before)
+
+        return VectorStoreFilesListInBatchResponse(
+            data=paginated_files,
+            first_id=first_id,
+            last_id=last_id,
+            has_more=has_more,
+        )
 
     async def openai_cancel_vector_store_file_batch(
         self,
@@ -857,4 +1241,24 @@ class OpenAIVectorStoreMixin(ABC):
         vector_store_id: str,
     ) -> VectorStoreFileBatchObject:
         """Cancel a vector store file batch."""
-        raise NotImplementedError("openai_cancel_vector_store_file_batch is not implemented yet")
+        batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
+
+        if batch_info["status"] not in ["in_progress"]:
+            raise ValueError(f"Cannot cancel batch {batch_id} with status {batch_info['status']}")
+
+        # Cancel the actual processing task if it exists
+        if batch_id in self._file_batch_tasks:
+            task = self._file_batch_tasks[batch_id]
+            if not task.done():
+                task.cancel()
+                logger.info(f"Cancelled processing task for file batch: {batch_id}")
+            # Remove from task tracking
+            del self._file_batch_tasks[batch_id]
+
+        batch_info["status"] = "cancelled"
+
+        await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+        updated_batch = VectorStoreFileBatchObject(**batch_info)
+
+        return updated_batch
diff --git a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
index 0d77df1f0..43b951749 100644
--- a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
+++ b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
@@ -14,23 +14,22 @@
       "__data__": {
         "models": [
           {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-04T12:20:09.202126-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-06T16:41:45.231544-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
             "details": {
               "parent_model": "",
               "format": "gguf",
-              "family": "llama",
+              "family": "bert",
               "families": [
-                "llama"
+                "bert"
               ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            },
-            "context_length": 4096
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
           }
         ]
       }
diff --git a/tests/integration/common/recordings/8b6244e7be7e4d03874b13df9cb5af4cccef848e79440e4298528185efe45ed4.json b/tests/integration/common/recordings/8b6244e7be7e4d03874b13df9cb5af4cccef848e79440e4298528185efe45ed4.json
new file mode 100644
index 000000000..1647f511e
--- /dev/null
+++ b/tests/integration/common/recordings/8b6244e7be7e4d03874b13df9cb5af4cccef848e79440e4298528185efe45ed4.json
@@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is the content of test file 2"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.036525182,
+              -0.0072787926,
+              -0.15320797,
+              -0.028591355,
+              0.028115708,
+              -0.0033384967,
+              0.021494914,
+              -0.023697548,
+              -0.059428893,
+              -0.04412936,
+              -0.014445912,
+              0.06520278,
+              0.013848802,
+              -0.029918822,
+              -0.022794332,
+              -0.012578859,
+              0.060358867,
+              -0.031223036,
+              -0.012306958,
+              -0.028883344,
+              -0.014677056,
+              -0.024171423,
+              -0.047258105,
+              -0.019668069,
+              0.10096786,
+              0.042677063,
+              -0.012945782,
+              0.05772575,
+              -0.09481949,
+              -0.013522372,
+              0.058091108,
+              -0.035321448,
+              0.02014728,
+              -0.06760144,
+              -0.012323442,
+              -0.045392025,
+              0.04685687,
+              0.024927035,
+              -0.0017673819,
+              0.036423087,
+              -0.020881223,
+              -0.010788712,
+              -0.01838111,
+              -0.007801951,
+              -0.011164214,
+              -0.022797823,
+              -0.01222212,
+              0.05638416,
+              -0.01662934,
+              -0.04117776,
+              0.004534807,
+              0.019233605,
+              -0.008680182,
+              0.03177389,
+              0.06082287,
+              -0.010224672,
+              -0.018689552,
+              -0.019074611,
+              0.029412521,
+              -0.06990004,
+              0.054043073,
+              0.027053045,
+              -0.049923293,
+              0.058975294,
+              0.0018301148,
+              -0.06718531,
+              -0.044889167,
+              0.032761537,
+              -0.022604113,
+              0.043496683,
+              0.08500273,
+              0.008184364,
+              0.0011824819,
+              -0.0417156,
+              -0.015855035,
+              -0.06935983,
+              0.01533393,
+              -0.03297617,
+              -0.043794934,
+              0.008973833,
+              0.0415081,
+              0.037018586,
+              0.004035694,
+              0.0067175985,
+              0.058073524,
+              -0.033033613,
+              -0.049569633,
+              -0.011724681,
+              -0.0049699075,
+              0.04405061,
+              0.02349984,
+              0.049434863,
+              0.05952279,
+              0.007926657,
+              -0.019564645,
+              0.028824113,
+              0.030559592,
+              0.044332445,
+              -0.03705847,
+              0.013914022,
+              -0.01584405,
+              0.012503536,
+              0.039434463,
+              0.020927113,
+              0.02458832,
+              0.033364173,
+              -0.0013068066,
+              0.025197528,
+              -0.05292493,
+              0.010358094,
+              -0.018871995,
+              0.039280638,
+              -0.048534855,
+              0.004642058,
+              0.011491514,
+              -0.036328327,
+              0.0637683,
+              -0.0360448,
+              -0.04317744,
+              0.03721341,
+              0.009880729,
+              -0.032810695,
+              0.012197031,
+              0.06644975,
+              0.04497407,
+              0.0018043267,
+              -0.076117076,
+              0.0028520897,
+              0.025521474,
+              -0.04780887,
+              -0.015784036,
+              -0.004914463,
+              -0.0003810333,
+              -0.008213055,
+              -0.0040868036,
+              0.0026211734,
+              0.005037653,
+              -0.0054035867,
+              -0.054472372,
+              -0.04214955,
+              -0.036636207,
+              0.005277914,
+              0.025802922,
+              0.054448027,
+              0.010910778,
+              -0.019098252,
+              0.06248315,
+              0.019785397,
+              -0.02148464,
+              -0.023303429,
+              0.0124828555,
+              -0.02455258,
+              0.0053893207,
+              0.006655952,
+              0.020618292,
+              -0.05195774,
+              0.001724354,
+              -0.049451906,
+              0.031900283,
+              0.08541784,
+              0.02900063,
+              0.006208959,
+              -0.009606019,
+              0.0030572556,
+              -0.018463623,
+              0.014401457,
+              0.0007510511,
+              0.08289015,
+              0.062720075,
+              -0.010840198,
+              -0.04971401,
+              -0.038808372,
+              0.0044536674,
+              0.011472072,
+              -0.031167375,
+              -0.031224154,
+              0.011706744,
+              -0.022990009,
+              0.04747808,
+              -0.0016337503,
+              0.015181135,
+              0.005154193,
+              0.00949444,
+              0.042812645,
+              0.001253686,
+              -0.050080713,
+              0.038098942,
+              -0.014367589,
+              -0.043111958,
+              -0.0059632747,
+              -0.022602718,
+              -0.0042201183,
+              -0.09451348,
+              -0.042164654,
+              -0.010821582,
+              -0.04681359,
+              0.016275495,
+              -0.0033313567,
+              0.027538816,
+              -0.019907625,
+              0.00040033093,
+              -0.030824887,
+              -0.058938056,
+              0.0014922265,
+              -0.027667042,
+              0.015573365,
+              -0.04173136,
+              -0.015453809,
+              -0.01595607,
+              0.03898053,
+              0.043484144,
+              0.0075124763,
+              -0.0025220348,
+              0.038111743,
+              0.041447856,
+              -0.011153068,
+              -0.01717726,
+              -0.045249123,
+              -0.010734678,
+              -0.03552057,
+              0.033035237,
+              -0.0077519426,
+              0.048082184,
+              -0.06981011,
+              0.034551185,
+              0.011257734,
+              -0.043801457,
+              -0.018373946,
+              -0.04797999,
+              -0.017102923,
+              0.0029698398,
+              -0.09975526,
+              0.00053959514,
+              0.0074329274,
+              -0.018584883,
+              -0.0094977375,
+              -0.05056549,
+              0.08929669,
+              0.011828429,
+              0.040005405,
+              -0.03369542,
+              0.07867971,
+              0.025032107,
+              0.016890414,
+              0.014425124,
+              0.00064274436,
+              0.009868133,
+              -0.034772366,
+              0.05254746,
+              0.071544185,
+              -0.01852601,
+              -0.0013607002,
+              0.010325862,
+              0.0647086,
+              0.013452749,
+              -0.009807788,
+              -0.01738053,
+              -0.012833702,
+              -0.0037767375,
+              -0.046967912,
+              0.017845146,
+              -0.0682881,
+              0.011557345,
+              0.01458601,
+              -0.048856564,
+              -0.01423403,
+              -0.03424404,
+              0.021640293,
+              -0.025939032,
+              -0.001273354,
+              0.0033471219,
+              0.02255794,
+              -0.05386608,
+              0.02134696,
+              0.012213072,
+              -0.027799206,
+              0.041816894,
+              0.013318655,
+              -0.027756989,
+              0.03054267,
+              -0.025455547,
+              0.014977695,
+              0.03629763,
+              0.05029929,
+              0.017317088,
+              0.0008021539,
+              -0.027486524,
+              0.0011794426,
+              0.021061994,
+              0.038059466,
+              0.014114616,
+              0.014319938,
+              0.012650396,
+              0.04102732,
+              0.018222608,
+              0.0115328785,
+              0.043359082,
+              -0.0028082337,
+              -0.016893078,
+              -0.03791571,
+              0.023969462,
+              0.0077467947,
+              0.033167463,
+              0.018768141,
+              0.00804635,
+              -0.05316497,
+              0.021600094,
+              -0.032088757,
+              0.056640208,
+              0.010592809,
+              -0.06282453,
+              -0.003963599,
+              -0.0054780785,
+              0.0057015507,
+              -0.026736109,
+              0.03140229,
+              0.021742998,
+              0.037487593,
+              0.04916904,
+              -0.015454876,
+              0.0036427178,
+              -0.06809397,
+              -0.005600329,
+              0.006426826,
+              0.029163402,
+              0.008698685,
+              0.013447198,
+              0.028116653,
+              -0.032959465,
+              -0.046715226,
+              0.062885955,
+              0.07805104,
+              -0.075704284,
+              -0.026722923,
+              0.031568483,
+              0.029869428,
+              0.014207811,
+              0.058283728,
+              -0.0009454238,
+              0.049990628,
+              0.09433687,
+              0.011483032,
+              0.0073822956,
+              0.001765557,
+              0.014384013,
+              -0.0805711,
+              -0.057262138,
+              0.0033087756,
+              0.017576102,
+              0.050261848,
+              -0.0058530914,
+              -0.00258757,
+              0.009722071,
+              0.0044941446,
+              0.009631424,
+              0.027689122,
+              0.012394503,
+              -0.04055002,
+              0.055514883,
+              -0.028808117,
+              0.0297643,
+              -0.034311485,
+              0.021378465,
+              -0.033280674,
+              0.019441161,
+              -0.009369208,
+              0.0030489776,
+              -0.016572703,
+              0.042294934,
+              0.015723946,
+              0.0022674324,
+              -0.0014906601,
+              0.01840701,
+              0.059862193,
+              0.053135127,
+              0.020754104,
+              -0.06374346,
+              0.001787633,
+              -0.036681958,
+              0.03553359,
+              0.06609074,
+              -0.0107706385,
+              0.045129295,
+              0.06838197,
+              0.025984539,
+              -0.06558362,
+              0.027897354,
+              -0.00621841,
+              0.03920637,
+              0.009362378,
+              -0.062093496,
+              0.021269219,
+              -0.06091154,
+              -0.027098468,
+              0.008638457,
+              -0.050488967,
+              0.04693317,
+              0.043328438,
+              -0.025587596,
+              0.03407469,
+              -0.048816204,
+              -0.004734613,
+              -0.0008902356,
+              0.024133636,
+              -0.022534605,
+              0.035635088,
+              -0.053277653,
+              -0.055609506,
+              0.0523981,
+              0.0014473854,
+              0.032570753,
+              -0.005762427,
+              -0.016173452,
+              -0.06672014,
+              0.0013724786,
+              0.007844828,
+              0.02429992,
+              0.0032019925,
+              0.0016553001,
+              -0.022802994,
+              0.001800882,
+              0.032480165,
+              -0.002195562,
+              -0.03154405,
+              -0.013679192,
+              -0.011184489,
+              0.033688888,
+              0.04774288,
+              0.0018061483,
+              -0.09035719,
+              -0.0047670994,
+              -0.02052915,
+              0.036272082,
+              0.020193182,
+              -0.036813166,
+              0.039460275,
+              -0.015967365,
+              -0.0033895948,
+              -0.031828586,
+              0.053221144,
+              0.021549668,
+              -0.07595095,
+              -0.044737455,
+              -0.010761814,
+              0.0025511624,
+              0.14498504,
+              0.08222001,
+              -0.037528154,
+              -0.032176156,
+              0.013683398,
+              0.01410672,
+              0.019557275,
+              0.062485218,
+              0.027925756,
+              0.079192385,
+              -0.026622739,
+              0.02323037,
+              -0.016175434,
+              -0.032527965,
+              -0.008870566,
+              -0.009013046,
+              -0.009945577,
+              0.025208296,
+              0.0073141777,
+              0.044331536,
+              -0.020921648,
+              -0.016868133,
+              -0.026842397,
+              0.03165012,
+              0.043120645,
+              -0.048179835,
+              -0.05591947,
+              0.029399967,
+              -0.069223806,
+              0.03508237,
+              0.00804212,
+              -0.041150257,
+              0.008898182,
+              0.0006015489,
+              0.023109462,
+              0.027766718,
+              0.012039964,
+              -0.030886615,
+              -0.030401329,
+              0.038484607,
+              -0.0247026,
+              0.0018090954,
+              0.028525416,
+              0.054761168,
+              -0.0062592058,
+              0.029739086,
+              0.033199638,
+              0.0488184,
+              0.028191078,
+              -0.020734766,
+              0.00060847827,
+              0.029920708,
+              -0.0490555,
+              0.007290553,
+              0.0026984178,
+              0.063341014,
+              0.018249765,
+              0.019682994,
+              0.0063302247,
+              -0.029094942,
+              -0.030193835,
+              0.042414594,
+              -0.05859321,
+              -0.09094711,
+              -0.025345713,
+              -0.034041878,
+              -0.014829038,
+              0.0030920506,
+              0.015670862,
+              0.073476,
+              0.017715238,
+              0.052982714,
+              0.012198469,
+              -0.021962965,
+              0.017349334,
+              0.025136312,
+              0.006353252,
+              0.03436416,
+              -0.01633907,
+              -0.08311436,
+              0.04788054,
+              0.0032672018,
+              -0.0318856,
+              0.06784985,
+              0.072452076,
+              0.009116457,
+              0.017004106,
+              -0.040795434,
+              -0.023130566,
+              -0.0017866351,
+              -0.020753238,
+              -0.028738804,
+              0.0031001552,
+              -0.012533389,
+              0.047431413,
+              -0.059432007,
+              -0.019904893,
+              0.009464013,
+              -0.016388606,
+              0.028543858,
+              -0.026128467,
+              -0.03368374,
+              -0.0040021804,
+              3.1505784e-05,
+              -0.10005339,
+              0.020524276,
+              -0.06320255,
+              -0.026909621,
+              -0.009929203,
+              0.03084924,
+              -0.041759893,
+              0.02034976,
+              -0.008311877,
+              -0.0042031757,
+              -0.04709363,
+              0.030620687,
+              -0.028947143,
+              -0.007556809,
+              0.01617724,
+              0.037857477,
+              -0.039480377,
+              -0.008805032,
+              0.051410846,
+              0.017079966,
+              0.0032464731,
+              0.023022559,
+              -0.017350538,
+              0.03471975,
+              -0.02863222,
+              -0.024592673,
+              -0.0077179587,
+              0.03141146,
+              0.03583118,
+              -0.0130302245,
+              -0.057425633,
+              0.040003538,
+              -0.0046423534,
+              0.019725544,
+              0.0397109,
+              -0.0025461344,
+              0.046675395,
+              0.011516851,
+              -0.029444098,
+              0.03419632,
+              -0.043872464,
+              -0.021072017,
+              -0.010389852,
+              0.01248914,
+              -0.03476949,
+              0.02083105,
+              -0.021170666,
+              -0.010824939,
+              -0.034223318,
+              0.0008804664,
+              -0.00975538,
+              -0.004145119,
+              0.0062736045,
+              0.017810361,
+              -0.05057402,
+              0.0028876425,
+              -0.012459405,
+              0.024415256,
+              -0.009684934,
+              -0.032268245,
+              -1.0135974e-05,
+              0.015377202,
+              -0.008089165,
+              -0.08534785,
+              0.011209079,
+              -0.006432232,
+              -0.05970185,
+              0.03646468,
+              -0.024002092,
+              -0.022855703,
+              -0.051673587,
+              0.038473092,
+              -0.028756764,
+              0.041329525,
+              -0.06377881,
+              -0.014500157,
+              -0.018372798,
+              -0.008677442,
+              0.036858637,
+              0.038448237,
+              0.044321943,
+              -0.046770208,
+              0.026638264,
+              -0.04069364,
+              -0.051563717,
+              -0.054425545,
+              -0.007966239,
+              -0.045169767,
+              -0.0006122694,
+              0.013411372,
+              0.04263278,
+              0.03749003,
+              0.010722818,
+              -0.041889716,
+              -0.036726084,
+              0.014166507,
+              0.038341004,
+              0.004509263,
+              0.035988707,
+              0.02634235,
+              -0.02256134,
+              0.08171513,
+              0.09104147,
+              0.06757358,
+              -0.0016213343,
+              -0.018941583,
+              -0.0014519675,
+              0.03409365,
+              -0.060576558,
+              -0.028001321,
+              -0.08352477,
+              0.011629786,
+              0.014637305,
+              -0.021191692,
+              0.009192876,
+              0.0025693115,
+              0.03831378,
+              -0.00035758872,
+              -0.032391928,
+              0.006118005,
+              -0.05773841,
+              0.033030152,
+              0.03268179,
+              0.031052263,
+              -0.0018795256,
+              -0.0463158,
+              0.017675944,
+              0.039604764,
+              0.056545958,
+              -0.002072885,
+              -0.0374349,
+              -0.014934615,
+              -0.046360567,
+              0.060439337,
+              -5.3795357e-05,
+              0.027416907,
+              -0.08041611,
+              0.00016825287,
+              -0.08668716,
+              -0.03210328,
+              0.016515074,
+              -0.0062253834,
+              -0.00093463395,
+              -0.027180947,
+              -0.049670145,
+              -0.033094753,
+              -0.0051170597,
+              0.031569846,
+              -0.014995825,
+              -0.016850019,
+              0.04239559,
+              0.020676404,
+              0.0319851,
+              -0.008854607,
+              0.04452473,
+              -0.023021534,
+              0.007295005,
+              0.05227394,
+              0.0040576537,
+              -0.0655794,
+              -0.067981854,
+              0.03440047,
+              0.009278226,
+              -0.0282169,
+              0.060756575,
+              -0.020904224,
+              0.01505642,
+              -0.0045534745,
+              0.018723203,
+              0.0035253377,
+              0.011872832,
+              0.042355374,
+              0.017724074,
+              -0.060881015,
+              0.010464869,
+              -0.015852634,
+              -0.03751531,
+              0.022855468,
+              -0.037866883,
+              0.05328077,
+              -0.0320521,
+              -0.030350016,
+              -0.010912554,
+              -0.012704745,
+              0.0076516517,
+              0.0014142905,
+              0.011725254,
+              0.0067488,
+              -0.008221275,
+              0.01648301,
+              -0.013712469,
+              0.0129476935,
+              0.028405288,
+              -0.011489568,
+              -0.006695754,
+              -0.07523671,
+              0.0012562524,
+              -0.051538818,
+              0.017514601,
+              0.03280067,
+              -0.018965578,
+              0.009017527,
+              -0.052108284,
+              0.0017074383,
+              0.00056099903,
+              0.008343997,
+              -0.01674154,
+              -0.012425597,
+              -0.00041037227,
+              0.1104793,
+              -0.015096156,
+              0.014880369,
+              -0.0098567465,
+              0.024937985,
+              0.0112802675,
+              -0.0010737488,
+              -0.06354736,
+              -3.862344e-05,
+              -0.024247888
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/9ca52f6470a742d637b9da12ff00b4ab85adbbe4903193947ce19260447e8619.json b/tests/integration/common/recordings/9ca52f6470a742d637b9da12ff00b4ab85adbbe4903193947ce19260447e8619.json
new file mode 100644
index 000000000..290635671
--- /dev/null
+++ b/tests/integration/common/recordings/9ca52f6470a742d637b9da12ff00b4ab85adbbe4903193947ce19260447e8619.json
@@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is batch test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.01183041,
+              -0.0065989625,
+              -0.159677,
+              0.011660306,
+              0.055617318,
+              -0.03764695,
+              0.0163666,
+              0.033777084,
+              -0.06433634,
+              -0.08037781,
+              -0.0057114926,
+              0.07607082,
+              0.033490222,
+              0.048497792,
+              -0.048456103,
+              -0.049539,
+              0.059783153,
+              -0.08439946,
+              0.0076269372,
+              -0.0128732305,
+              0.05902644,
+              0.012931591,
+              -0.08323305,
+              -0.00037215627,
+              0.13830419,
+              0.024290211,
+              -0.047809705,
+              0.039520696,
+              -0.06423598,
+              -0.01653946,
+              0.03764018,
+              -0.001062501,
+              0.028489634,
+              -0.025925444,
+              -0.015699588,
+              -0.012715725,
+              0.017358005,
+              -0.007198467,
+              0.059812553,
+              0.028332362,
+              -0.00015984774,
+              0.004483949,
+              0.034580402,
+              -0.054280724,
+              -0.002989754,
+              0.023461882,
+              0.011839507,
+              0.018908013,
+              0.016710319,
+              0.004905327,
+              -0.0107955905,
+              -0.01565778,
+              -0.04169478,
+              0.02510759,
+              0.026486792,
+              0.01054831,
+              0.011289881,
+              0.038714606,
+              -0.0136384675,
+              -0.023249293,
+              0.014086617,
+              0.018654121,
+              -0.07146624,
+              0.047506154,
+              -0.012085512,
+              -0.007589288,
+              -0.009515838,
+              0.0048574316,
+              -0.004600554,
+              0.0031499087,
+              0.06778753,
+              -0.019641325,
+              0.018102348,
+              -0.01726182,
+              -0.003802732,
+              -0.04414122,
+              -0.010491107,
+              -0.065158285,
+              -0.045328394,
+              -0.0019480857,
+              0.052318677,
+              0.0386049,
+              0.020296056,
+              0.044793047,
+              0.08282699,
+              -0.019911101,
+              -0.016511027,
+              -0.0062321154,
+              -0.025036003,
+              0.04578435,
+              0.0019149093,
+              0.025694296,
+              -0.0042011673,
+              -0.018107908,
+              -0.026668591,
+              0.018340195,
+              0.010810087,
+              0.018672433,
+              -0.006774911,
+              -0.0026458725,
+              0.023082372,
+              0.027705511,
+              0.019753877,
+              -0.03543464,
+              -0.0061461334,
+              0.0155549655,
+              -0.019579103,
+              -0.00693201,
+              -0.06635246,
+              -0.015482261,
+              -0.0040295934,
+              0.0006957319,
+              -0.008871345,
+              -0.00842857,
+              0.031484608,
+              -0.010076284,
+              0.06257018,
+              0.0012318427,
+              -0.024530765,
+              0.00015912329,
+              0.0033331378,
+              -0.032083686,
+              -0.007399188,
+              0.07031288,
+              0.033552274,
+              0.061820872,
+              -0.09171231,
+              0.036374647,
+              0.007984676,
+              -0.031679634,
+              0.00598418,
+              -0.0029291043,
+              -0.0049730917,
+              -0.052057285,
+              0.020125173,
+              0.009004486,
+              -0.022456508,
+              -0.012051283,
+              -0.03740793,
+              -0.027594674,
+              -0.02012376,
+              0.011664398,
+              0.04336321,
+              0.061720803,
+              0.041055538,
+              -0.02444171,
+              0.024476659,
+              0.030615946,
+              -0.01689858,
+              0.0091607245,
+              0.0038629547,
+              -0.0019203863,
+              -0.0035829302,
+              0.021674454,
+              0.037874587,
+              -0.057554636,
+              0.014823112,
+              0.0036189007,
+              0.012866306,
+              0.051631145,
+              0.0021970836,
+              -0.033981066,
+              -0.03782387,
+              0.01235394,
+              -0.057634324,
+              -0.07556398,
+              0.008977255,
+              0.07841102,
+              0.060794022,
+              -0.03463157,
+              -0.063551195,
+              -0.064811006,
+              0.010709957,
+              -0.027145889,
+              -0.0837886,
+              -0.035913587,
+              0.017231362,
+              -0.01455278,
+              0.039031487,
+              -0.038145658,
+              0.023733672,
+              -0.019787688,
+              0.020164428,
+              0.023367887,
+              0.0035691075,
+              -0.028722964,
+              0.014704597,
+              -0.019744202,
+              -0.06668101,
+              -0.017812628,
+              -0.009186517,
+              -0.033119973,
+              -0.085967295,
+              -0.080312125,
+              0.013302178,
+              -0.061551016,
+              0.017130975,
+              -0.017442413,
+              0.04742156,
+              -0.013023663,
+              -0.013847287,
+              -0.01880652,
+              -0.07011646,
+              0.018233122,
+              -0.030537246,
+              -0.026766777,
+              -0.012263141,
+              0.014689888,
+              -0.049961388,
+              0.03201573,
+              0.015774516,
+              -0.020335846,
+              -0.038940914,
+              0.0065977564,
+              0.035997562,
+              -0.053227507,
+              0.008883548,
+              -0.039375745,
+              -0.017865263,
+              0.007343183,
+              0.017375462,
+              0.021595728,
+              0.057712954,
+              -0.040693924,
+              -0.000778912,
+              -0.018082067,
+              -0.015103824,
+              -0.024191063,
+              -0.0077742958,
+              -0.034330968,
+              -0.020159615,
+              -0.03245423,
+              0.0020437704,
+              -0.000114842755,
+              -0.029564297,
+              -0.018030599,
+              -0.0031425157,
+              0.053831782,
+              -0.026106073,
+              0.04243461,
+              -0.048363626,
+              0.025711408,
+              -0.008338205,
+              0.0009197218,
+              -0.011072695,
+              0.00031293565,
+              0.0033421176,
+              -0.007302082,
+              0.04127773,
+              -0.0074836435,
+              -0.04299338,
+              -0.002760089,
+              0.019094143,
+              0.039009947,
+              0.03581834,
+              -0.032022007,
+              -0.009045915,
+              -0.03275861,
+              0.017295409,
+              -0.039618656,
+              0.015396318,
+              -0.07593323,
+              0.03475173,
+              0.007710904,
+              -0.009037294,
+              -0.026630195,
+              -0.027383188,
+              0.02212514,
+              -0.035001624,
+              -0.0219445,
+              -0.01212384,
+              -0.0018017493,
+              -0.011781174,
+              -0.051410057,
+              0.026306989,
+              0.006329408,
+              0.010307703,
+              0.01613663,
+              -0.006002573,
+              0.031006144,
+              -0.036049806,
+              -0.018159281,
+              -0.012575659,
+              -0.0048318235,
+              0.048996273,
+              -0.0010814993,
+              0.050774954,
+              -0.027395276,
+              0.0115728015,
+              0.031056559,
+              0.011177566,
+              0.012006755,
+              -0.02556873,
+              0.029484332,
+              -0.009657058,
+              0.009322593,
+              0.022122696,
+              -0.018415872,
+              0.010098681,
+              -0.007367993,
+              -0.023805562,
+              0.035959154,
+              0.028602934,
+              0.030718775,
+              0.01705538,
+              -0.024984695,
+              0.042858277,
+              -0.015449,
+              0.005040281,
+              0.038991883,
+              -0.07141338,
+              -0.002947093,
+              -0.044420503,
+              0.019382862,
+              -0.040407836,
+              0.04245461,
+              0.048940845,
+              0.018063093,
+              0.08591597,
+              -0.035389014,
+              -0.010674617,
+              -0.103511095,
+              -0.008537786,
+              0.010264984,
+              -0.003966177,
+              0.02314327,
+              0.0048719845,
+              0.06199085,
+              -0.00810136,
+              -0.039515182,
+              0.05785542,
+              0.06719427,
+              -0.039108012,
+              -0.050833326,
+              0.05823837,
+              0.017042343,
+              0.005815163,
+              0.039831843,
+              -0.012049576,
+              0.076485425,
+              0.012621482,
+              0.06927575,
+              0.05359866,
+              -0.015146923,
+              0.044284295,
+              -0.062355984,
+              -0.009034613,
+              0.04071826,
+              -0.01236521,
+              0.079400524,
+              0.0017920422,
+              -0.011480363,
+              0.008711773,
+              0.018180674,
+              -0.0030674522,
+              0.0326583,
+              0.03525443,
+              -0.02087537,
+              0.05094025,
+              -0.0037492628,
+              0.009178962,
+              -0.0050435406,
+              -0.01166052,
+              0.0060158456,
+              -0.002493798,
+              0.021641793,
+              0.0019783853,
+              0.023140313,
+              0.046997964,
+              0.0069999313,
+              -0.0552795,
+              -0.020092534,
+              0.06467227,
+              0.044829298,
+              0.013295184,
+              0.0377816,
+              -0.046331275,
+              0.01770082,
+              -0.013348137,
+              0.04617519,
+              0.04468347,
+              -0.03253012,
+              0.015447477,
+              0.030224748,
+              -0.0013485672,
+              -0.03615717,
+              0.008698818,
+              -0.0037734164,
+              0.04494809,
+              0.037184346,
+              -0.011223347,
+              0.0046344185,
+              -0.07529732,
+              0.025554653,
+              -0.015140733,
+              -0.0035430966,
+              0.03661124,
+              0.013250649,
+              -0.055586766,
+              0.027562145,
+              -0.018204745,
+              -0.029428158,
+              -0.0029150618,
+              0.03623637,
+              -0.022476854,
+              -0.0058649112,
+              -0.015735915,
+              -0.019995706,
+              0.032269973,
+              0.017872665,
+              0.028031865,
+              -0.043758772,
+              -0.027188994,
+              -0.058870632,
+              0.024894219,
+              0.015318543,
+              0.06244725,
+              0.021922529,
+              0.000678521,
+              -0.025339983,
+              0.025911404,
+              0.01583583,
+              -0.014407775,
+              -0.037194725,
+              -0.015699212,
+              0.008184332,
+              0.014927899,
+              0.0737949,
+              0.007748195,
+              -0.07158831,
+              -0.039901625,
+              0.031431172,
+              0.011147712,
+              0.020828275,
+              -0.035193726,
+              0.05613746,
+              -0.0022006142,
+              0.008007006,
+              0.001472366,
+              0.019893395,
+              0.044233263,
+              -0.02244468,
+              -0.0665883,
+              0.013832251,
+              0.0026457622,
+              0.09737926,
+              0.09575702,
+              -0.04908296,
+              -0.062802345,
+              -0.0095988205,
+              0.008329187,
+              0.041316554,
+              -0.0222064,
+              0.02813126,
+              0.07059441,
+              -0.02560012,
+              0.044651207,
+              -0.027545268,
+              -0.007889025,
+              0.03391235,
+              0.008170332,
+              0.0067786956,
+              0.0615806,
+              0.044006567,
+              0.0056231483,
+              -0.024909342,
+              0.040038925,
+              -0.037021257,
+              0.0010181392,
+              0.058034208,
+              -0.021651162,
+              -0.06021004,
+              0.014830516,
+              -0.050770685,
+              0.010422301,
+              0.0016205559,
+              -0.03166853,
+              0.014091049,
+              -0.002066098,
+              0.02992549,
+              0.013251145,
+              0.011673487,
+              -0.0430954,
+              -0.048110887,
+              0.01493126,
+              0.006862025,
+              0.04188833,
+              0.011692501,
+              0.0465231,
+              0.010624,
+              0.02873104,
+              0.037793215,
+              0.08978305,
+              0.011727344,
+              0.043248493,
+              -0.033803374,
+              0.011249601,
+              -0.015437648,
+              -0.009372223,
+              -0.005403984,
+              -0.009915787,
+              -0.030847883,
+              -0.00076942804,
+              0.018497106,
+              -0.00030310496,
+              -0.0076847905,
+              -0.0036222623,
+              -0.008554024,
+              -0.07606582,
+              -0.024716768,
+              -0.028077196,
+              -0.024249833,
+              0.027158285,
+              0.0075863106,
+              0.09348848,
+              -0.00034073484,
+              0.039915837,
+              -0.007647916,
+              -0.035295825,
+              0.01611119,
+              0.060429912,
+              0.009458672,
+              0.027763832,
+              -0.025683967,
+              -0.091306895,
+              0.0367077,
+              0.009893541,
+              -5.195292e-05,
+              0.045459133,
+              0.04671114,
+              -0.0023683042,
+              0.017460158,
+              -0.007978136,
+              0.00081788,
+              -0.009908127,
+              0.0049076737,
+              -0.03604046,
+              0.024152907,
+              0.0022956813,
+              0.061990347,
+              -0.061900347,
+              0.0047628507,
+              0.007954329,
+              -0.05227117,
+              0.013897867,
+              -0.0034024485,
+              -0.06788996,
+              0.036198605,
+              -0.014600589,
+              -0.038748026,
+              0.031534728,
+              -0.037783317,
+              -0.057816587,
+              -0.054505207,
+              0.010229355,
+              -0.01668772,
+              -0.013999046,
+              -0.049303915,
+              -0.013006012,
+              -0.020143948,
+              0.0009209327,
+              0.010504151,
+              0.052313875,
+              -0.003835063,
+              0.03984861,
+              -0.05403,
+              0.004036369,
+              0.035671517,
+              -0.009310839,
+              0.01921996,
+              0.015426655,
+              -0.042717084,
+              -0.016548151,
+              -0.03559785,
+              -0.03052737,
+              0.0016032697,
+              0.04009937,
+              0.05516244,
+              -0.009645057,
+              -0.019377265,
+              0.017122837,
+              0.007185355,
+              0.012066883,
+              0.015954316,
+              -0.0029309995,
+              -0.008670052,
+              0.0007600626,
+              -0.0019616315,
+              0.03605449,
+              -0.028704248,
+              -0.057372347,
+              -0.03711266,
+              0.02601168,
+              0.020637576,
+              -0.014288832,
+              0.023694387,
+              -0.018556923,
+              -0.003977263,
+              -0.03251488,
+              -0.04545843,
+              -0.027434839,
+              0.013158248,
+              -0.005281848,
+              -0.03187363,
+              -0.022890532,
+              -0.0063330783,
+              0.040277284,
+              0.017638152,
+              -0.038472284,
+              0.015346814,
+              0.06673371,
+              -0.011651253,
+              -0.06683331,
+              0.008377879,
+              -0.030951817,
+              -0.036013808,
+              0.02394849,
+              0.023321355,
+              0.024521058,
+              -0.03078664,
+              0.014595395,
+              -0.037766363,
+              0.075227626,
+              -0.01933975,
+              0.043791853,
+              -0.025162384,
+              -0.044860955,
+              0.0059519857,
+              0.04085485,
+              0.06551164,
+              -0.05282273,
+              0.0030225238,
+              -0.06850771,
+              -0.062015526,
+              -0.06011264,
+              0.014174797,
+              -0.050894123,
+              0.017077608,
+              0.021088008,
+              0.058029104,
+              0.043224387,
+              -0.004394573,
+              -0.0022478225,
+              -0.006972821,
+              0.02401093,
+              0.022611097,
+              8.550083e-05,
+              0.056450296,
+              0.055112243,
+              -0.034522895,
+              0.06482398,
+              0.08114595,
+              0.022528961,
+              -0.013464262,
+              -0.0029874062,
+              0.005515398,
+              0.026176685,
+              -0.041392956,
+              -0.035894908,
+              -0.052102275,
+              0.032556653,
+              -0.016931413,
+              -0.047386043,
+              0.012574915,
+              0.03802867,
+              0.045309085,
+              0.025728,
+              -0.02505067,
+              0.039530423,
+              -0.065004446,
+              0.017083768,
+              0.0033854055,
+              0.07688453,
+              -0.019878633,
+              -0.0025184979,
+              -0.0027949202,
+              0.052868426,
+              0.054179598,
+              -0.0040608337,
+              -0.0053128796,
+              -0.04103081,
+              -0.049691968,
+              0.06014039,
+              0.04751648,
+              0.015087763,
+              -0.06859484,
+              0.00805693,
+              -0.061754886,
+              0.008819008,
+              -0.027785089,
+              -0.010586925,
+              -0.020496469,
+              -0.029158294,
+              -0.05417346,
+              -0.029509347,
+              -0.025456924,
+              0.041056376,
+              0.0075264946,
+              -0.018885529,
+              0.07735419,
+              0.00489195,
+              0.050696895,
+              -0.0041886116,
+              0.064080104,
+              -0.020775754,
+              -0.017177466,
+              0.0023288913,
+              0.010398866,
+              -0.040627487,
+              -0.034321204,
+              0.016019996,
+              0.028118521,
+              0.014172112,
+              0.08738979,
+              -0.03657629,
+              0.018347794,
+              -0.03947416,
+              -0.01077611,
+              0.00085160177,
+              0.0368259,
+              0.05611389,
+              0.05134766,
+              -0.025541335,
+              -0.0057555106,
+              -0.013793745,
+              -0.05975066,
+              -0.0064870752,
+              -0.053716175,
+              0.024583345,
+              -0.084030546,
+              -0.048775505,
+              -0.059886374,
+              -0.057641674,
+              0.030222055,
+              0.018706435,
+              0.023170326,
+              -0.0064046904,
+              -0.018711446,
+              -0.0029956547,
+              0.0377868,
+              -0.012569718,
+              0.0514249,
+              -0.012415474,
+              -0.018657023,
+              -0.040379863,
+              0.029388199,
+              -0.07378978,
+              0.026212148,
+              0.0056296797,
+              -0.00405927,
+              0.021354636,
+              -0.0822599,
+              0.01597725,
+              0.07648158,
+              -0.006006045,
+              -0.014829594,
+              -0.021541826,
+              0.0032610476,
+              0.06906917,
+              -0.05802312,
+              -0.023113884,
+              -0.015534724,
+              0.016758824,
+              0.0030736707,
+              -0.0022294512,
+              -0.026804008,
+              -0.0031566115,
+              -0.0584943
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/aa45f61f2d277765422722394dbeb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json b/tests/integration/common/recordings/aa45f61f2d277765422722394dbeb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json
new file mode 100644
index 000000000..88a1568bf
--- /dev/null
+++ b/tests/integration/common/recordings/aa45f61f2d277765422722394dbeb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json
@@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.029406669,
+              0.08920982,
+              -0.11326726,
+              0.0065823817,
+              0.07725067,
+              -0.036890104,
+              0.030436223,
+              0.041454185,
+              -0.049156666,
+              0.018258564,
+              0.14662577,
+              0.01744915,
+              -0.012837422,
+              -0.06889876,
+              -0.039401636,
+              -0.038800705,
+              -0.08963421,
+              -0.059656583,
+              0.001375945,
+              0.045138627,
+              0.042796962,
+              0.053700265,
+              -0.035706885,
+              0.010138017,
+              0.060920056,
+              0.017344126,
+              -0.05633907,
+              0.063370295,
+              0.0021257724,
+              -0.083796844,
+              0.050487563,
+              0.047987595,
+              0.069071226,
+              0.049588464,
+              0.117036626,
+              0.05339311,
+              0.10129953,
+              -0.048230153,
+              -0.014987975,
+              0.0250915,
+              0.031392053,
+              -0.008863942,
+              0.0073650074,
+              -0.0009767569,
+              -0.016403567,
+              0.015523393,
+              -0.010998956,
+              -0.014870063,
+              0.0061682137,
+              -0.0017961137,
+              -0.022682818,
+              0.018210242,
+              -0.07757007,
+              -0.0015845516,
+              0.069547005,
+              0.000419109,
+              0.038414054,
+              0.005823485,
+              -0.028931383,
+              0.07009549,
+              -0.0018009909,
+              0.033516172,
+              -0.014593847,
+              0.03922457,
+              0.08240545,
+              -0.050596908,
+              -0.039732855,
+              -0.024425076,
+              -0.015055329,
+              -0.11705068,
+              -0.15979129,
+              -0.008256823,
+              -0.0100719705,
+              0.03266482,
+              0.0029998205,
+              0.0316428,
+              -0.094554916,
+              0.017661797,
+              0.058996264,
+              -0.119718134,
+              -0.027414676,
+              -0.09155906,
+              0.040038,
+              0.01091849,
+              -0.029446004,
+              0.10225186,
+              0.06583262,
+              -0.003439552,
+              -0.009694834,
+              0.016906522,
+              0.023685955,
+              -0.032616187,
+              -0.010238839,
+              0.07891618,
+              -0.007330681,
+              0.05238444,
+              0.00943625,
+              0.042121,
+              0.08491511,
+              0.049208272,
+              -0.01868227,
+              -0.013585418,
+              0.06727199,
+              0.084571496,
+              -0.103213035,
+              -0.08387524,
+              0.03641842,
+              -0.047227863,
+              0.057315867,
+              -0.04463932,
+              0.006783099,
+              -0.08934107,
+              -0.015040418,
+              -0.08107057,
+              0.013285569,
+              -0.060907867,
+              -0.042128306,
+              0.057306163,
+              -0.058711898,
+              0.04628304,
+              0.070194095,
+              -0.041729517,
+              -0.0338408,
+              -0.012369257,
+              -0.044708908,
+              -0.059450094,
+              0.08251312,
+              -3.443368e-33,
+              0.0121309515,
+              -0.11084454,
+              -0.020510655,
+              0.10916455,
+              0.033683147,
+              -0.02845083,
+              0.024345158,
+              0.034192592,
+              -0.08367815,
+              0.0064610844,
+              -0.00912456,
+              -0.0663567,
+              -0.0028754657,
+              0.008272698,
+              -0.09166764,
+              0.0089771375,
+              -0.03963948,
+              0.019947624,
+              -0.01321528,
+              -0.019034218,
+              0.051933073,
+              0.028107261,
+              -0.039153125,
+              -0.080395184,
+              -0.050503474,
+              0.02060341,
+              -0.012718284,
+              -0.046732575,
+              0.017907938,
+              -0.0028334607,
+              -0.011695137,
+              -0.05667005,
+              -0.043894444,
+              0.034919597,
+              0.022352098,
+              0.046777196,
+              0.045085873,
+              -0.008840106,
+              -0.06373453,
+              0.036720857,
+              0.012829601,
+              -0.035169926,
+              0.046209145,
+              -0.014361767,
+              0.03706697,
+              -0.056797564,
+              -0.06310496,
+              0.010818958,
+              0.047810175,
+              0.0029118094,
+              -0.003235893,
+              0.061511047,
+              0.072056666,
+              -0.03286638,
+              0.005070082,
+              0.021947902,
+              -0.017779002,
+              -0.022738373,
+              -0.021926457,
+              0.047074158,
+              0.010847615,
+              0.05539702,
+              -0.07119971,
+              0.033833236,
+              0.012342855,
+              -0.047586687,
+              -0.026776271,
+              -0.09885727,
+              0.10053448,
+              0.036877092,
+              -0.07049897,
+              -0.059692938,
+              0.016129492,
+              -0.0016443401,
+              -0.026804024,
+              -0.013527272,
+              -0.015385511,
+              0.055627547,
+              -0.060485132,
+              -0.055540122,
+              -0.04329072,
+              -0.07097361,
+              -0.04857043,
+              -0.03726256,
+              -0.09059366,
+              -0.036855534,
+              0.024561211,
+              -0.10113953,
+              0.056738112,
+              -0.10995085,
+              0.042282794,
+              0.014222368,
+              -0.07067843,
+              -0.05902307,
+              0.06426122,
+              1.6036318e-33,
+              0.037851896,
+              0.032911286,
+              -0.04029648,
+              -0.00049357174,
+              0.028011942,
+              0.048672136,
+              0.07279598,
+              -0.027471887,
+              -0.02847654,
+              0.114492,
+              0.001777095,
+              -0.009519909,
+              0.0025862327,
+              -0.056408145,
+              0.023462169,
+              -0.006209674,
+              -0.010567065,
+              -0.05877587,
+              -0.032393616,
+              0.011836781,
+              -0.038905054,
+              0.05516299,
+              0.09564333,
+              0.028543225,
+              -0.023832332,
+              -0.0015711841,
+              0.047049087,
+              0.03128219,
+              0.02811091,
+              0.007177092,
+              0.055283513,
+              0.06574452,
+              -0.1020208,
+              0.021213628,
+              0.020237882,
+              -0.10449357,
+              0.09608935,
+              -0.06253181,
+              0.015293753,
+              0.042053986,
+              0.06105009,
+              0.0909162,
+              0.018404186,
+              0.031023262,
+              0.03562763,
+              0.112073965,
+              0.10124763,
+              -0.007683015,
+              0.013140281,
+              -0.042280227,
+              0.051135287,
+              -0.02950743,
+              0.027794402,
+              -0.010734668,
+              -0.011067552,
+              0.058104575,
+              -0.009284788,
+              0.056184508,
+              -0.040822964,
+              0.010282754,
+              0.0374409,
+              0.054198533,
+              -0.061418086,
+              0.030569963,
+              0.0023648597,
+              -0.054184474,
+              -0.020570045,
+              0.012422129,
+              0.025696559,
+              -0.007607385,
+              -0.026194826,
+              -0.024159024,
+              0.0012979766,
+              -0.07461716,
+              0.051458035,
+              -0.004183808,
+              -0.040804464,
+              -0.023975441,
+              0.009455526,
+              -0.0018798193,
+              0.03668693,
+              -0.019319497,
+              -0.06195781,
+              0.06456675,
+              0.040328216,
+              -0.010790134,
+              0.013190221,
+              0.09067539,
+              -0.0051480443,
+              0.013312647,
+              -0.029548675,
+              0.07769003,
+              0.0027328292,
+              0.04533781,
+              -0.0017606319,
+              -1.661594e-08,
+              -0.040610366,
+              -0.09883059,
+              -0.05522113,
+              -0.02916469,
+              -0.019305382,
+              0.088138185,
+              -0.038325552,
+              -0.03327639,
+              -0.012629364,
+              0.006948921,
+              0.010438818,
+              0.026771523,
+              -0.040855426,
+              -0.03958403,
+              -0.051137064,
+              -0.016159322,
+              -0.020525131,
+              -0.023726366,
+              -0.013322245,
+              -0.008097836,
+              0.028000915,
+              0.02806969,
+              0.015645925,
+              -0.0043166955,
+              0.0054488196,
+              0.06720413,
+              0.068473674,
+              0.07172716,
+              -0.06339439,
+              -0.02540609,
+              0.08468492,
+              0.041936778,
+              0.021067144,
+              -0.07596481,
+              0.017143335,
+              0.1260291,
+              0.121315174,
+              0.08431059,
+              0.040587336,
+              0.036687315,
+              -0.04717,
+              -0.022659328,
+              -0.006820436,
+              0.005210712,
+              -0.033785924,
+              -0.08449115,
+              -0.0844501,
+              -0.03192747,
+              -0.036649443,
+              -0.13791409,
+              -0.036417518,
+              -0.00080547476,
+              -0.047578912,
+              0.038795993,
+              -0.06757743,
+              0.016941966,
+              0.036312684,
+              0.0125779435,
+              -0.058240637,
+              0.004471269,
+              0.03226526,
+              0.09821741,
+              0.053010236,
+              -0.016268
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/d48ba62fab4e243d368ec42e5497b932ab697ffaa1cc79a7caf46b404677fb31.json b/tests/integration/common/recordings/d48ba62fab4e243d368ec42e5497b932ab697ffaa1cc79a7caf46b404677fb31.json
new file mode 100644
index 000000000..3019aa169
--- /dev/null
+++ b/tests/integration/common/recordings/d48ba62fab4e243d368ec42e5497b932ab697ffaa1cc79a7caf46b404677fb31.json
@@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.009745733,
+              0.03363038,
+              -0.10852256,
+              0.026609829,
+              -0.0060599064,
+              -0.020473678,
+              0.0692486,
+              0.032276765,
+              -0.11532835,
+              -0.0005207133,
+              0.11814916,
+              0.0119809555,
+              0.03685765,
+              -0.10744223,
+              -0.046515625,
+              0.0015449532,
+              -0.06319664,
+              -0.04640812,
+              -0.037318822,
+              -0.025718328,
+              -0.00026058854,
+              -0.011890766,
+              -0.050925612,
+              0.014111713,
+              0.029467698,
+              0.006379121,
+              -0.012013293,
+              -0.0024293982,
+              -0.044318773,
+              -0.08100101,
+              0.02009568,
+              0.055713937,
+              0.078816675,
+              0.054973654,
+              0.20367871,
+              -0.004309458,
+              0.03877001,
+              0.03825522,
+              -0.002538199,
+              -0.0007973801,
+              0.044761047,
+              -0.054529082,
+              -0.008856888,
+              -0.04078078,
+              0.011367262,
+              -0.022404457,
+              -0.06209053,
+              0.02558725,
+              -0.0034454092,
+              -0.03743928,
+              -0.062026348,
+              -0.030812219,
+              -0.034592565,
+              -0.014926672,
+              0.018588377,
+              0.013435887,
+              0.08169151,
+              0.053658403,
+              -0.03557856,
+              0.033325985,
+              -0.01637577,
+              -0.0222152,
+              -0.039247517,
+              0.00094368146,
+              0.10228945,
+              -0.04305617,
+              -0.052200828,
+              -0.02007385,
+              0.054805383,
+              -0.08231377,
+              -0.14736547,
+              0.048954617,
+              -0.0212168,
+              0.02872658,
+              -0.0671409,
+              0.021436114,
+              -0.023599947,
+              0.03677982,
+              0.010577411,
+              -0.0966004,
+              -0.06367233,
+              -0.10277648,
+              0.0273993,
+              -0.06292906,
+              -0.046344172,
+              0.039919835,
+              0.02682899,
+              0.025460077,
+              -0.013083559,
+              -0.002667712,
+              -0.016529463,
+              0.012605053,
+              -0.0064383023,
+              0.015841383,
+              -0.01710707,
+              0.12320292,
+              -0.0077660284,
+              0.05845043,
+              0.07362552,
+              0.038426086,
+              0.004742023,
+              -0.0155985365,
+              0.01418979,
+              0.07865995,
+              -0.026352523,
+              -0.037174653,
+              0.06787817,
+              -0.060126718,
+              0.06111402,
+              -0.034931272,
+              -0.009446326,
+              -0.006150886,
+              0.02892313,
+              -0.09361577,
+              0.0335364,
+              -0.09088912,
+              0.009241144,
+              0.07092964,
+              -0.08954648,
+              0.04494549,
+              0.040462427,
+              -0.04167353,
+              0.0076030386,
+              -0.0066417656,
+              -0.07275736,
+              -0.043690544,
+              0.07685007,
+              -1.0508795e-33,
+              -0.019583685,
+              -0.13087204,
+              -0.03574564,
+              0.070223756,
+              0.08133056,
+              -0.009436003,
+              0.046778366,
+              0.03478148,
+              -0.09441185,
+              -0.040857755,
+              -0.02127058,
+              -0.106959894,
+              0.024023255,
+              0.022780996,
+              -0.09042505,
+              -0.035755932,
+              0.011359196,
+              0.050059184,
+              0.0050815986,
+              -0.07676938,
+              0.05453651,
+              0.04191775,
+              -0.009206564,
+              -0.022437057,
+              -0.04617258,
+              -0.038608693,
+              -0.00036489012,
+              -0.025092375,
+              0.039146807,
+              -0.0072839926,
+              0.03675482,
+              -0.011301064,
+              -0.08863303,
+              0.059421506,
+              0.015851071,
+              0.033407707,
+              0.056883834,
+              -0.01203776,
+              0.027333334,
+              -0.009560535,
+              -0.05030555,
+              -0.009787559,
+              0.023205005,
+              -0.007937716,
+              0.003991047,
+              -0.036422852,
+              -0.06979188,
+              0.046075627,
+              0.056377746,
+              0.0071927872,
+              -0.00020658698,
+              0.017678235,
+              0.023745935,
+              -0.0031295705,
+              0.016370842,
+              0.027585855,
+              -0.03440131,
+              -0.05594279,
+              0.036442764,
+              0.03577988,
+              -0.005324585,
+              0.015240975,
+              -0.09071462,
+              0.072764605,
+              0.02343818,
+              -0.093097225,
+              0.05842133,
+              -0.061913762,
+              0.045556016,
+              0.07639311,
+              -0.035199754,
+              -0.009256856,
+              0.038682748,
+              -0.040795818,
+              0.017686425,
+              -0.025513103,
+              0.06860537,
+              0.085520275,
+              -0.1023457,
+              -0.0036474275,
+              -0.014826131,
+              -0.05045756,
+              -0.09065474,
+              -0.076476775,
+              -0.008538021,
+              -0.04111943,
+              -0.035473913,
+              -0.061549038,
+              0.114327826,
+              -0.09601482,
+              0.022990143,
+              0.0022396755,
+              -0.023026146,
+              -0.028128328,
+              0.07969127,
+              -4.1765383e-34,
+              0.07866384,
+              0.11484068,
+              0.016687382,
+              0.009315677,
+              0.01664128,
+              0.024303248,
+              0.046507504,
+              -0.043804675,
+              -0.09136995,
+              0.106353745,
+              -0.06948852,
+              0.018747667,
+              0.0053492193,
+              -0.033229355,
+              0.042339083,
+              -0.0017468681,
+              0.05323157,
+              0.0058223205,
+              -0.05331342,
+              0.016506517,
+              -0.02325185,
+              0.097519755,
+              -0.0045558517,
+              0.08866843,
+              -0.028221445,
+              -0.012007969,
+              -0.009742725,
+              0.061458003,
+              0.01574456,
+              -0.00039456616,
+              0.02444834,
+              0.065891184,
+              -0.054779086,
+              0.04863689,
+              0.043890025,
+              -0.062467597,
+              0.07615393,
+              0.0067509366,
+              0.019150084,
+              0.06994535,
+              0.027900916,
+              0.08902746,
+              -0.027433047,
+              0.031390887,
+              0.02271287,
+              0.08119532,
+              0.06855678,
+              0.0023552915,
+              -0.06764184,
+              0.00704173,
+              -0.034521427,
+              -0.053785548,
+              -0.03075216,
+              0.007947864,
+              -0.025317406,
+              -0.040664013,
+              0.036144093,
+              0.017730465,
+              -0.040179063,
+              0.013665757,
+              0.004815376,
+              0.009095556,
+              0.0072483593,
+              0.012753351,
+              -0.047865536,
+              -0.046072423,
+              -0.014048283,
+              0.031082962,
+              -0.034945205,
+              -0.023550391,
+              0.033062257,
+              -0.022966444,
+              0.007744228,
+              0.015939556,
+              -0.0012224894,
+              0.0010534802,
+              -0.015109,
+              -0.021597888,
+              -0.029862719,
+              0.03983828,
+              0.062536344,
+              0.0106168175,
+              -0.027220478,
+              0.02410377,
+              -0.0023566757,
+              0.085310005,
+              0.04843323,
+              0.090823516,
+              0.005126319,
+              0.020297319,
+              -0.01739127,
+              0.047677357,
+              0.11080086,
+              0.030030197,
+              0.029773563,
+              -1.5454503e-08,
+              -0.03580758,
+              -0.12177604,
+              0.019753791,
+              0.05854353,
+              -0.01590761,
+              0.085781366,
+              -0.09558486,
+              -0.0016744126,
+              0.00773199,
+              -0.04790156,
+              0.01175936,
+              0.006536077,
+              -0.032027386,
+              0.0031026274,
+              -0.07580574,
+              -0.039700802,
+              -0.00170645,
+              -0.070955865,
+              0.043680355,
+              0.029966798,
+              0.0039943648,
+              0.031923376,
+              0.08119928,
+              0.038820695,
+              0.013302812,
+              0.041675337,
+              0.044349737,
+              0.060403902,
+              -0.1058191,
+              -0.05287386,
+              0.050275758,
+              0.039101604,
+              0.0599918,
+              -0.025067834,
+              -0.019554066,
+              0.06748813,
+              0.12508559,
+              0.059007537,
+              -0.019899847,
+              -0.030194808,
+              -0.046559453,
+              0.034567222,
+              -0.021644907,
+              -0.03327634,
+              -0.0075667608,
+              -0.100658834,
+              -0.0639619,
+              -0.055270903,
+              -0.0111757815,
+              -0.11671873,
+              -0.07208087,
+              0.023208033,
+              0.027215267,
+              0.063635156,
+              -0.05858023,
+              0.020345282,
+              0.018325811,
+              -0.0036095325,
+              0.006916675,
+              0.06541716,
+              0.009575581,
+              0.046839867,
+              0.0070611075,
+              -0.09470841
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/e297006956fc1fb184d0bbaa79f7beaa67a3824a6cd5d7a0e21c8e587ea03980.json b/tests/integration/common/recordings/e297006956fc1fb184d0bbaa79f7beaa67a3824a6cd5d7a0e21c8e587ea03980.json
new file mode 100644
index 000000000..c260afd8c
--- /dev/null
+++ b/tests/integration/common/recordings/e297006956fc1fb184d0bbaa79f7beaa67a3824a6cd5d7a0e21c8e587ea03980.json
@@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is the content of test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.011639302,
+              0.015066345,
+              -0.1572681,
+              -0.044965014,
+              0.045302268,
+              0.012036585,
+              0.036542512,
+              0.005725059,
+              -0.052068613,
+              -0.023581833,
+              -0.0041714185,
+              0.047297083,
+              0.0044818125,
+              0.0073796143,
+              -0.06833552,
+              -0.020871542,
+              0.030256433,
+              -0.052156504,
+              -0.016426736,
+              -0.045092978,
+              -0.012395779,
+              -0.017792987,
+              -0.08013035,
+              -0.025271736,
+              0.110538565,
+              0.019197728,
+              -0.05617586,
+              0.045342237,
+              -0.100888394,
+              -0.015532925,
+              0.04541118,
+              -0.048470836,
+              0.014533936,
+              -0.04054472,
+              0.004343861,
+              -0.03328387,
+              0.038874496,
+              0.034725353,
+              0.022646122,
+              0.024648184,
+              -0.02911675,
+              -0.0140342,
+              -0.010215055,
+              -0.00092316914,
+              0.015458233,
+              0.0042022346,
+              -0.0118241655,
+              0.031950384,
+              0.021818206,
+              -0.0009401939,
+              0.0028767833,
+              0.022676043,
+              -0.027578428,
+              0.017072845,
+              0.055942602,
+              0.008372957,
+              -8.234923e-05,
+              -0.0076934453,
+              0.017103186,
+              -0.07049976,
+              0.0470288,
+              0.030520689,
+              -0.0853248,
+              0.031003723,
+              0.005461848,
+              -0.039933346,
+              -0.028195756,
+              0.02583814,
+              -0.020038705,
+              0.027421296,
+              0.09211795,
+              -0.0021492639,
+              0.009114191,
+              -0.02858135,
+              -0.0130490055,
+              -0.047928475,
+              0.021908045,
+              -0.03816779,
+              -0.040175024,
+              -0.008988226,
+              0.062123742,
+              0.032331105,
+              0.013500545,
+              0.014699184,
+              0.06949165,
+              -0.029347114,
+              -0.036963575,
+              -0.025804758,
+              0.006973289,
+              0.03219541,
+              0.014725156,
+              0.032485504,
+              0.025228832,
+              0.015978005,
+              -0.0036463195,
+              0.009395636,
+              -0.0030804265,
+              0.026493264,
+              -0.01026861,
+              0.0003747859,
+              0.017421532,
+              0.015864568,
+              0.0254427,
+              -0.021840125,
+              0.006622214,
+              0.018735437,
+              -0.008662971,
+              8.567802e-05,
+              -0.08026379,
+              -0.003987451,
+              -0.0022324976,
+              0.030920457,
+              -0.03272228,
+              -0.025135856,
+              0.015818166,
+              -0.030539474,
+              0.084593095,
+              -0.0072662015,
+              -0.04306349,
+              0.016708883,
+              -0.02148629,
+              -0.021512741,
+              0.011571002,
+              0.07055689,
+              0.016873637,
+              0.017103009,
+              -0.058425475,
+              0.009254332,
+              0.018121762,
+              -0.029209172,
+              -0.017481297,
+              0.005129311,
+              -1.4240719e-05,
+              -0.02815651,
+              -0.012156167,
+              -0.011126637,
+              0.012530035,
+              0.017916953,
+              -0.049299978,
+              -0.023406321,
+              -0.025908781,
+              0.01868743,
+              0.013128073,
+              0.030577261,
+              0.03492911,
+              -0.026720364,
+              0.044888426,
+              0.015100583,
+              -0.02517811,
+              -0.018026695,
+              0.04455666,
+              0.006026339,
+              0.006132853,
+              0.013067113,
+              0.013630368,
+              -0.06992026,
+              0.015714098,
+              -0.025995858,
+              0.00764349,
+              0.06502328,
+              0.00921131,
+              0.00039559926,
+              -0.014907944,
+              0.033250615,
+              -0.022297438,
+              -0.022631606,
+              -0.009259513,
+              0.07459313,
+              0.07961807,
+              -0.04546293,
+              -0.04984229,
+              -0.056986727,
+              -0.021624641,
+              -0.009604434,
+              -0.050308105,
+              -0.029882062,
+              0.02890167,
+              -0.016925206,
+              0.03357617,
+              -0.013084858,
+              0.032628123,
+              0.004407517,
+              0.028667213,
+              0.02581998,
+              -0.008354794,
+              -0.045190092,
+              0.017996402,
+              -0.021489577,
+              -0.049504388,
+              0.003702337,
+              -0.023653682,
+              -0.007418799,
+              -0.09230719,
+              -0.0666417,
+              0.01368294,
+              -0.07626095,
+              0.016283033,
+              -0.025274273,
+              0.046640623,
+              -0.03306251,
+              -0.019547738,
+              -0.02728644,
+              -0.038707435,
+              -0.0075380616,
+              -0.03706684,
+              -0.014038333,
+              -0.019394161,
+              -0.010599687,
+              -0.0057279305,
+              0.01753179,
+              0.037147418,
+              -0.01409748,
+              -0.028078519,
+              0.028943742,
+              0.044126343,
+              -0.024797611,
+              -0.02061766,
+              -0.041145287,
+              -0.0016994113,
+              -0.035794605,
+              0.022767134,
+              0.007715133,
+              0.033083446,
+              -0.06898011,
+              0.0077070463,
+              0.0039297407,
+              -0.038172692,
+              -0.032068398,
+              -0.043679804,
+              -0.0292851,
+              -0.020715753,
+              -0.05462352,
+              0.011206036,
+              0.020920858,
+              -0.007133438,
+              -0.006820509,
+              -0.016809242,
+              0.06488191,
+              -0.0150427865,
+              0.040075593,
+              -0.047243405,
+              0.05071197,
+              0.015879754,
+              -0.0006090825,
+              0.0067252424,
+              0.0052318904,
+              0.0038148144,
+              -0.032034587,
+              0.032176103,
+              0.040441014,
+              -0.03223476,
+              0.0034279015,
+              0.04811163,
+              0.058563426,
+              0.025335358,
+              -0.03077014,
+              -0.0060142917,
+              -0.025248509,
+              0.024592392,
+              -0.03674746,
+              0.024663158,
+              -0.060253005,
+              0.009173809,
+              -0.004111937,
+              -0.063402615,
+              -0.01951628,
+              -0.039490156,
+              0.018258424,
+              -0.043015976,
+              -0.015164487,
+              -0.017073318,
+              0.027809769,
+              -0.021215433,
+              0.007797112,
+              0.008731678,
+              -0.036673818,
+              0.012786695,
+              0.028968208,
+              -0.030241087,
+              0.020865943,
+              -0.026749771,
+              0.033981804,
+              0.010454427,
+              0.023153242,
+              0.020885227,
+              -0.0056243115,
+              0.0117305005,
+              -0.015051302,
+              0.013582618,
+              0.03807434,
+              0.010856497,
+              0.020801183,
+              0.011158894,
+              0.036391996,
+              0.019670399,
+              0.007724792,
+              0.06660602,
+              -0.011434749,
+              0.0057949307,
+              -0.015963648,
+              -0.019779123,
+              0.005820883,
+              0.02833991,
+              0.055220414,
+              0.010273399,
+              -0.016092837,
+              0.03503124,
+              -0.034432467,
+              0.023686841,
+              0.022379564,
+              -0.07128316,
+              0.012263694,
+              -0.015228141,
+              0.0032988787,
+              -0.029930541,
+              0.041881878,
+              0.03506383,
+              0.020228907,
+              0.0438159,
+              -0.038998622,
+              0.0033828963,
+              -0.082220346,
+              -0.021915225,
+              -0.00014996591,
+              0.02804432,
+              0.020062406,
+              0.012756022,
+              0.034497134,
+              -0.02747778,
+              -0.047376838,
+              0.064383976,
+              0.070425786,
+              -0.05746651,
+              -0.028404344,
+              0.026372714,
+              0.03306257,
+              0.0073155067,
+              0.051485326,
+              0.0068675145,
+              0.040136788,
+              0.045383066,
+              0.034149066,
+              0.02086147,
+              0.0009087964,
+              0.037278313,
+              -0.081617154,
+              -0.032882202,
+              0.02157909,
+              0.021868218,
+              0.07965252,
+              -0.0027324036,
+              -0.0022803254,
+              0.014258049,
+              -0.0020600832,
+              0.00047349077,
+              0.04002713,
+              0.04263055,
+              -0.009511693,
+              0.06796055,
+              -0.02155429,
+              0.043834608,
+              -0.029989557,
+              0.009623121,
+              -0.026068889,
+              0.021337777,
+              0.011070724,
+              -0.020380916,
+              -0.0023191955,
+              0.046481982,
+              0.039304417,
+              -0.0045394786,
+              0.003737432,
+              0.034863517,
+              0.053514365,
+              0.035962798,
+              0.04095995,
+              -0.080873586,
+              0.0112584885,
+              -0.0145209655,
+              0.023800805,
+              0.04855744,
+              0.0037306463,
+              0.03949077,
+              0.042007603,
+              0.00916003,
+              -0.012223143,
+              0.022103913,
+              -0.017077385,
+              0.035043065,
+              0.0052557834,
+              -0.039841656,
+              0.0020140728,
+              -0.057917137,
+              0.03641347,
+              0.017727314,
+              -0.030229636,
+              0.026509946,
+              0.010324972,
+              -0.028184937,
+              0.017539727,
+              -0.021746434,
+              0.0031611102,
+              -0.008564719,
+              0.026577024,
+              -0.0073260553,
+              0.012139988,
+              -0.039608642,
+              -0.062452354,
+              0.03773313,
+              0.002820345,
+              0.017331626,
+              -0.008981819,
+              -0.02020533,
+              -0.057272766,
+              -0.014693149,
+              0.033687364,
+              0.038407385,
+              -0.020838683,
+              0.038617346,
+              -0.03282725,
+              0.0065172473,
+              0.031010486,
+              -0.0017651296,
+              -0.02163586,
+              -0.008899588,
+              -0.026506478,
+              0.03540833,
+              0.07076032,
+              -0.016357146,
+              -0.08069671,
+              -0.042310607,
+              -0.012363274,
+              0.03790111,
+              0.007565661,
+              -0.037524316,
+              0.07095513,
+              0.010869782,
+              0.0032129285,
+              -0.033399966,
+              0.038155936,
+              0.034415327,
+              -0.052643284,
+              -0.05567196,
+              -0.03225739,
+              0.008719539,
+              0.14483878,
+              0.071855366,
+              -0.026637336,
+              -0.04281552,
+              -0.02133026,
+              0.020932574,
+              0.023442162,
+              0.0018492922,
+              0.03244938,
+              0.08237317,
+              -0.03321164,
+              0.051374298,
+              -0.018296566,
+              -0.009659297,
+              0.031976808,
+              -0.010097727,
+              0.010057915,
+              0.051651575,
+              0.0199425,
+              0.019540219,
+              -0.020617861,
+              0.03563907,
+              -0.036343392,
+              0.032987807,
+              0.06027452,
+              -0.017668264,
+              -0.044425867,
+              0.015104213,
+              -0.07373515,
+              0.01810383,
+              0.031706426,
+              -0.046879865,
+              0.0036537861,
+              -0.031956047,
+              0.03578955,
+              0.027828328,
+              0.021754785,
+              -0.062319316,
+              -0.035861533,
+              0.023409521,
+              -0.011718964,
+              0.012511818,
+              0.019975103,
+              0.03046746,
+              0.019306395,
+              0.008897869,
+              0.022976985,
+              0.08666871,
+              0.034413245,
+              0.007698169,
+              -0.013328631,
+              0.026807705,
+              -0.039164156,
+              0.0001842902,
+              0.008939378,
+              0.053093646,
+              0.0054843645,
+              -0.0048546907,
+              0.006646481,
+              -0.036913976,
+              -0.02434218,
+              -0.007819763,
+              -0.034326635,
+              -0.09425071,
+              -0.035864092,
+              -0.027039077,
+              0.0018631782,
+              -0.011367168,
+              0.03460308,
+              0.06908907,
+              0.0006993122,
+              0.029187243,
+              0.013981396,
+              -0.034905925,
+              0.009661519,
+              0.016402403,
+              0.013219478,
+              0.025419146,
+              -0.013838593,
+              -0.09521828,
+              0.04690183,
+              0.008306249,
+              -0.04494361,
+              0.07675296,
+              0.08630913,
+              0.0027291386,
+              0.047438163,
+              -0.03291628,
+              -0.017013406,
+              0.008466675,
+              0.0068329596,
+              -0.047961134,
+              -0.0060370415,
+              0.017779041,
+              0.05304337,
+              -0.07138653,
+              -0.013791788,
+              0.01667366,
+              -0.026808698,
+              0.012813507,
+              -0.029537767,
+              -0.07048566,
+              0.026801381,
+              -0.021863695,
+              -0.08986038,
+              0.04256004,
+              -0.042580713,
+              -0.050321113,
+              -0.02441381,
+              0.024967946,
+              -0.03307329,
+              0.023765154,
+              -0.042465124,
+              -0.022590572,
+              -0.050977908,
+              0.02002681,
+              -0.01659008,
+              -0.0016500946,
+              0.007923218,
+              0.023085529,
+              -0.028293792,
+              -0.0070867077,
+              -0.002519201,
+              0.014844528,
+              0.012927241,
+              0.013701682,
+              -0.048480112,
+              0.017051037,
+              -0.048594326,
+              -0.03374255,
+              0.015788445,
+              0.01736624,
+              0.02363127,
+              -0.043622795,
+              -0.04752542,
+              0.05619384,
+              -0.009064419,
+              0.013587886,
+              0.031963795,
+              0.0055674682,
+              0.00821165,
+              -0.007879534,
+              -0.025519967,
+              0.030929072,
+              -0.03054716,
+              -0.028717758,
+              -0.01304714,
+              0.025171572,
+              -0.004879199,
+              -0.001190343,
+              -0.010213315,
+              0.01971419,
+              -0.032143768,
+              -0.008055433,
+              -0.045028396,
+              0.0050284,
+              0.008977255,
+              0.007132238,
+              -0.052949388,
+              0.011562612,
+              -0.0043699676,
+              0.06377099,
+              -0.010715953,
+              -0.027962748,
+              0.0025381946,
+              0.065418504,
+              0.015951851,
+              -0.10228855,
+              -0.0038436814,
+              -0.015558708,
+              -0.035604823,
+              0.039515387,
+              -0.011977611,
+              0.008272532,
+              -0.047362626,
+              0.029810345,
+              -0.026100902,
+              0.080183394,
+              -0.029716058,
+              -0.008065036,
+              -0.0019149253,
+              -0.029152166,
+              0.030865246,
+              0.028290713,
+              0.059991617,
+              -0.0539013,
+              0.037941493,
+              -0.046701435,
+              -0.056897625,
+              -0.050652288,
+              0.0022519496,
+              -0.044697277,
+              0.018704673,
+              0.024128519,
+              0.06013336,
+              0.057803143,
+              -0.011098817,
+              0.004350433,
+              -0.046533823,
+              0.011547173,
+              0.039410993,
+              0.010503389,
+              0.058373533,
+              0.04097013,
+              -0.04243095,
+              0.09540366,
+              0.07546867,
+              0.057422172,
+              -0.0150666535,
+              -0.00072658417,
+              -0.0055776117,
+              0.03369649,
+              -0.07023698,
+              -0.041115183,
+              -0.06924242,
+              0.0061645363,
+              -0.00047588223,
+              -0.03563763,
+              0.011595489,
+              -0.0034681638,
+              0.02738642,
+              0.026109103,
+              -0.018220779,
+              0.026244855,
+              -0.067560904,
+              0.026338186,
+              0.016787479,
+              0.065061815,
+              -0.0032663948,
+              -0.040305886,
+              0.017459001,
+              0.036517326,
+              0.055479337,
+              0.00085552345,
+              -0.0372879,
+              -0.06509678,
+              -0.038734257,
+              0.052903496,
+              0.033298932,
+              0.039541215,
+              -0.09552075,
+              -0.0096350545,
+              -0.08214571,
+              -0.024635889,
+              0.012038027,
+              0.00089192577,
+              -0.03183621,
+              -0.011991485,
+              -0.03902091,
+              -0.0127780195,
+              -0.01724641,
+              0.051544886,
+              -0.0018517342,
+              -0.023545155,
+              0.046582974,
+              0.00838307,
+              0.030676562,
+              0.00019708494,
+              0.045098882,
+              -0.031479437,
+              -0.013706887,
+              0.021959703,
+              0.0020392945,
+              -0.06168245,
+              -0.03649696,
+              0.035295885,
+              0.02590806,
+              -0.010051864,
+              0.06865142,
+              -0.017345844,
+              0.01564999,
+              -0.00623685,
+              0.010844825,
+              -0.013015856,
+              0.022496467,
+              0.07649363,
+              0.036356304,
+              -0.040345356,
+              0.00293154,
+              -0.01804687,
+              -0.03515604,
+              0.022299029,
+              -0.03676945,
+              0.07276787,
+              -0.04430659,
+              -0.03392204,
+              -0.030020125,
+              -0.022968723,
+              0.029162299,
+              -0.0033855392,
+              0.021752143,
+              0.017534897,
+              -0.023780832,
+              0.027371254,
+              0.017058812,
+              -0.0004049258,
+              0.03990323,
+              -0.008081489,
+              -0.013143231,
+              -0.06439464,
+              0.018572995,
+              -0.046607014,
+              0.027462576,
+              0.014255841,
+              -0.02674485,
+              0.023134982,
+              -0.070987545,
+              0.00939401,
+              0.023703443,
+              -0.009809178,
+              0.022829901,
+              -0.040908735,
+              0.0064307996,
+              0.11391804,
+              -0.051118158,
+              0.020216303,
+              -0.02172353,
+              0.04750726,
+              0.018758802,
+              -0.0051700706,
+              -0.02455834,
+              0.005184222,
+              -0.036763046
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/f4b0cf7f241feb7ff68414545a42d5759b33d997f7b1305fc20ae7f7c50faa26.json b/tests/integration/common/recordings/f4b0cf7f241feb7ff68414545a42d5759b33d997f7b1305fc20ae7f7c50faa26.json
new file mode 100644
index 000000000..7b34088fe
--- /dev/null
+++ b/tests/integration/common/recordings/f4b0cf7f241feb7ff68414545a42d5759b33d997f7b1305fc20ae7f7c50faa26.json
@@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 2"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.014871168,
+              0.094365,
+              -0.098275684,
+              0.016189486,
+              0.072296426,
+              -0.039229725,
+              0.007638039,
+              0.035811495,
+              -0.03784589,
+              0.022591105,
+              0.15810202,
+              0.009195058,
+              -0.029846655,
+              -0.06448414,
+              -0.01898075,
+              -0.02023675,
+              -0.07593923,
+              -0.04666322,
+              0.010769107,
+              0.033283222,
+              0.06951838,
+              0.039086174,
+              -0.009640043,
+              -0.008601025,
+              0.039979056,
+              0.02799972,
+              -0.06578151,
+              0.08029443,
+              0.0101568075,
+              -0.07898879,
+              0.048795786,
+              0.057297125,
+              0.025737243,
+              0.03572965,
+              0.11485981,
+              0.030900626,
+              0.118485495,
+              -0.041167885,
+              -0.019413618,
+              -0.0009897926,
+              0.03717747,
+              -0.012367201,
+              -0.0026639055,
+              0.015703445,
+              -0.0046827365,
+              0.023138778,
+              0.012855939,
+              -0.029367425,
+              0.00042996072,
+              -0.003222942,
+              -0.055509202,
+              0.012830617,
+              -0.06941755,
+              -0.011024706,
+              0.07149942,
+              0.021040803,
+              0.0409756,
+              0.010087916,
+              -0.015326204,
+              0.06633094,
+              0.024846299,
+              0.030543685,
+              -0.036063526,
+              0.04786587,
+              0.08074621,
+              -0.051489003,
+              -0.03944393,
+              -0.025607359,
+              -0.030061793,
+              -0.119378455,
+              -0.14597124,
+              -0.0019379344,
+              0.008393092,
+              0.023913048,
+              0.028285578,
+              0.017838098,
+              -0.10575887,
+              0.008080291,
+              0.06388723,
+              -0.12506105,
+              -0.02536782,
+              -0.11007926,
+              0.051198784,
+              0.007446184,
+              -0.030837545,
+              0.09254253,
+              0.05638562,
+              -0.0155668175,
+              -0.031867314,
+              0.018337138,
+              0.02442871,
+              -0.042078987,
+              0.0038125275,
+              0.089955,
+              -0.008119613,
+              0.040103614,
+              0.011012824,
+              0.044628628,
+              0.0791957,
+              0.054247666,
+              -0.027651828,
+              -0.03190785,
+              0.041443683,
+              0.041629724,
+              -0.077835254,
+              -0.09937542,
+              0.029904107,
+              -0.05434366,
+              0.07058962,
+              -0.04535761,
+              0.03365359,
+              -0.061656676,
+              -0.018105442,
+              -0.07228336,
+              0.035377987,
+              -0.03161877,
+              -0.020589713,
+              0.058485094,
+              -0.049225487,
+              0.03934316,
+              0.08550028,
+              -0.029991213,
+              -0.05576064,
+              -0.029334918,
+              -0.053031918,
+              -0.061839186,
+              0.08176057,
+              -3.3282106e-33,
+              0.00018265574,
+              -0.09808404,
+              -0.00554673,
+              0.13180184,
+              0.026467713,
+              -0.03976283,
+              0.010410568,
+              0.022475285,
+              -0.07190717,
+              0.005138454,
+              -0.021325583,
+              -0.1046733,
+              0.0020021838,
+              0.023773609,
+              -0.057499945,
+              -0.011727483,
+              -0.020912478,
+              0.026353713,
+              0.01779019,
+              -0.0148312645,
+              0.064687304,
+              0.045060385,
+              -0.029312065,
+              -0.08633001,
+              -0.026792597,
+              0.014552106,
+              0.004505434,
+              -0.06774755,
+              0.034052122,
+              0.013713737,
+              -0.0075813113,
+              -0.059718475,
+              -0.016189422,
+              0.044314116,
+              0.026844766,
+              0.026430624,
+              0.024091395,
+              -0.0032406747,
+              -0.075288124,
+              0.032822173,
+              0.027104331,
+              -0.026295068,
+              0.04316082,
+              -0.010091815,
+              0.034184698,
+              -0.08266358,
+              -0.020962045,
+              -0.00719584,
+              0.068549044,
+              0.005033586,
+              0.0017975906,
+              0.06465498,
+              0.05990613,
+              -0.012483792,
+              0.024451919,
+              0.021659598,
+              -0.0046074707,
+              -0.004559902,
+              0.002713282,
+              0.062373567,
+              0.0035651235,
+              0.06017224,
+              -0.062707886,
+              0.039937016,
+              -0.0064443815,
+              -0.041358124,
+              -0.045459975,
+              -0.1090475,
+              0.08058783,
+              0.055110224,
+              -0.05126053,
+              -0.05976516,
+              0.037940193,
+              0.015456569,
+              -0.024956519,
+              -0.037877902,
+              -0.006799,
+              0.031685203,
+              -0.036858797,
+              -0.055584695,
+              -0.048513155,
+              -0.07101657,
+              -0.041681714,
+              -0.04429727,
+              -0.09584418,
+              -0.060873836,
+              0.008867621,
+              -0.106438614,
+              0.040050562,
+              -0.084729105,
+              0.018111277,
+              0.010153493,
+              -0.08883196,
+              -0.063969284,
+              0.08611972,
+              1.4074298e-33,
+              0.03433739,
+              0.037653737,
+              -0.05348675,
+              0.0015385789,
+              0.026684077,
+              0.026603375,
+              0.07006387,
+              -0.034265522,
+              -0.018221779,
+              0.10960259,
+              0.013464475,
+              -0.008325532,
+              0.019438146,
+              -0.039553005,
+              0.03469477,
+              -0.0123773115,
+              -0.013288484,
+              -0.048081715,
+              -0.019539693,
+              -0.0033996427,
+              -0.024453517,
+              0.061505664,
+              0.119236834,
+              0.026294904,
+              -0.01607055,
+              -0.011499089,
+              0.04267117,
+              0.0295908,
+              0.022084564,
+              0.007893738,
+              0.052055445,
+              0.05781507,
+              -0.13408813,
+              0.01778491,
+              0.021400984,
+              -0.12113228,
+              0.10535695,
+              -0.07358604,
+              -0.013651957,
+              0.04049295,
+              0.054150987,
+              0.0987462,
+              0.0110208625,
+              0.040327504,
+              0.034936633,
+              0.10400846,
+              0.12958324,
+              -0.024531014,
+              0.002284699,
+              -0.044239815,
+              0.049778443,
+              -0.055788964,
+              0.015235888,
+              0.0034493478,
+              -0.02607555,
+              0.060282644,
+              -0.028004775,
+              0.040875163,
+              -0.023749253,
+              0.002289086,
+              0.04982698,
+              0.046928305,
+              -0.064160004,
+              0.013701618,
+              0.015511878,
+              -0.054725982,
+              -0.0459802,
+              0.03258067,
+              0.027034523,
+              0.01643672,
+              -0.041782584,
+              -0.03698569,
+              -0.023043923,
+              -0.07073365,
+              0.028486207,
+              0.0017764921,
+              -0.03352676,
+              -0.009977863,
+              0.024488676,
+              -0.01789395,
+              0.029737154,
+              -0.026266927,
+              -0.03567072,
+              0.07469971,
+              0.028393274,
+              -0.029625034,
+              -0.01053128,
+              0.09147493,
+              -0.018718474,
+              0.0012933073,
+              -0.021214467,
+              0.07475739,
+              -0.007773536,
+              0.048597455,
+              0.005216022,
+              -1.6914717e-08,
+              -0.05724563,
+              -0.0938908,
+              -0.034359876,
+              -0.037500683,
+              -0.020235153,
+              0.06142227,
+              -0.042273093,
+              -0.008759724,
+              -0.009908796,
+              0.016232042,
+              -0.014239323,
+              0.024709346,
+              -0.030538557,
+              -0.05391127,
+              -0.051778477,
+              0.01277344,
+              0.0036140021,
+              -0.012569925,
+              -0.025041323,
+              -0.0203936,
+              0.025865255,
+              0.010908398,
+              0.027834684,
+              0.009661084,
+              -0.006598172,
+              0.07860872,
+              0.054516125,
+              0.042956624,
+              -0.06275145,
+              -0.025701547,
+              0.08085865,
+              0.030041302,
+              0.02248997,
+              -0.0840195,
+              0.00029938898,
+              0.10966559,
+              0.118907265,
+              0.063014604,
+              0.037847042,
+              0.032069027,
+              -0.05345487,
+              -0.022730324,
+              0.0071888734,
+              0.037573762,
+              -0.020178014,
+              -0.090167634,
+              -0.07191704,
+              -0.02604166,
+              -0.043885063,
+              -0.14087014,
+              -0.017230472,
+              -0.012063355,
+              -0.046736836,
+              0.039048597,
+              -0.060394738,
+              0.022166032,
+              0.025670663,
+              0.022949725,
+              -0.06707243,
+              -0.014654702,
+              0.057985142,
+              0.10511708,
+              0.05698323,
+              -0.017205814
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/f7a80fae588892aa7031ac972c12030f2bd2ee609d672e9f44275c601800b144.json b/tests/integration/common/recordings/f7a80fae588892aa7031ac972c12030f2bd2ee609d672e9f44275c601800b144.json
new file mode 100644
index 000000000..1c1706c94
--- /dev/null
+++ b/tests/integration/common/recordings/f7a80fae588892aa7031ac972c12030f2bd2ee609d672e9f44275c601800b144.json
@@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 0"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.020637129,
+              0.048547756,
+              -0.12516363,
+              0.01991118,
+              -0.006535745,
+              -0.017178575,
+              0.027727997,
+              0.032170568,
+              -0.07302972,
+              0.008939002,
+              0.11493648,
+              0.0058907545,
+              0.0058539375,
+              -0.077171296,
+              -0.06883132,
+              0.0039748913,
+              -0.046849657,
+              -0.072902456,
+              -0.010890429,
+              -0.0019311906,
+              -0.011614798,
+              0.003689495,
+              -0.03695609,
+              -0.009029024,
+              0.017461002,
+              -0.004713484,
+              -0.010254731,
+              -0.026636763,
+              -0.026125714,
+              -0.046913657,
+              0.017024228,
+              0.0713477,
+              0.07881179,
+              0.03789051,
+              0.21716279,
+              -0.0077837943,
+              0.04686894,
+              0.020414647,
+              7.314368e-05,
+              0.0103133675,
+              0.059848394,
+              -0.04321678,
+              -0.011937493,
+              -0.021149047,
+              0.021315353,
+              -0.00072822213,
+              -0.046116166,
+              -0.0046820445,
+              0.016943695,
+              -0.03249135,
+              -0.055184096,
+              4.1543382e-05,
+              -0.034172166,
+              -0.023247559,
+              0.020267941,
+              0.012827845,
+              0.065036125,
+              0.07180022,
+              -0.013490698,
+              0.06376413,
+              -0.017730094,
+              -0.01806601,
+              -0.034191083,
+              0.008955718,
+              0.098446764,
+              -0.0061265854,
+              -0.06815829,
+              -0.039525956,
+              0.060588058,
+              -0.094874755,
+              -0.11774928,
+              0.019538416,
+              -0.014697532,
+              0.04773719,
+              -0.061298393,
+              0.030337377,
+              -0.0022184649,
+              0.019007793,
+              0.024370821,
+              -0.07063359,
+              -0.07582954,
+              -0.10816809,
+              0.031845964,
+              -0.057830192,
+              -0.04169559,
+              0.0752806,
+              0.019289386,
+              0.028845867,
+              0.0077010663,
+              0.013930818,
+              -0.067987345,
+              0.012679873,
+              -0.07907268,
+              0.0143718915,
+              -0.021433424,
+              0.11880779,
+              -0.016258432,
+              0.07099568,
+              0.035778854,
+              0.028776454,
+              0.013304291,
+              -0.05192297,
+              0.026758345,
+              0.10282426,
+              -0.003306269,
+              -0.03239622,
+              0.083044045,
+              -0.0412691,
+              0.043435257,
+              -0.043423533,
+              -0.013239603,
+              -0.0029038454,
+              0.038365215,
+              -0.10401672,
+              0.012744224,
+              -0.122984126,
+              -0.008942817,
+              0.06162198,
+              -0.120285526,
+              0.043005254,
+              0.04814879,
+              -0.036352232,
+              -0.003885529,
+              -0.018503373,
+              -0.088186465,
+              -0.0031517749,
+              0.09290919,
+              -1.1695094e-33,
+              -0.015589721,
+              -0.13189551,
+              0.008088751,
+              0.06899503,
+              0.07353927,
+              -0.030646399,
+              0.05110342,
+              0.03081624,
+              -0.07850498,
+              -0.021147482,
+              0.00017823944,
+              -0.10502706,
+              0.030078856,
+              0.02572523,
+              -0.068158925,
+              -0.025015576,
+              0.021830637,
+              0.049748335,
+              0.01520941,
+              -0.080153145,
+              0.06796621,
+              0.021865685,
+              -0.034017574,
+              -0.030821111,
+              -0.048006665,
+              0.0005615041,
+              -0.0137883695,
+              -0.04500587,
+              0.015368256,
+              -0.0043663937,
+              0.037706476,
+              0.0049090013,
+              -0.06216566,
+              0.03060772,
+              0.030548712,
+              0.029262561,
+              0.020701125,
+              0.0056516766,
+              0.010610447,
+              0.019530762,
+              -0.05664136,
+              -0.022654066,
+              -0.0010107337,
+              -0.020805702,
+              -0.012242364,
+              -0.05591731,
+              -0.049421698,
+              0.024721064,
+              0.05803342,
+              0.010474127,
+              -0.008790625,
+              0.025362873,
+              0.020258408,
+              0.004368581,
+              -0.01018003,
+              0.012385932,
+              -0.037656736,
+              -0.05642639,
+              0.020923307,
+              0.022813153,
+              -0.005735433,
+              0.015326356,
+              -0.108707875,
+              0.048076265,
+              0.023256551,
+              -0.10311626,
+              0.061980195,
+              -0.07340407,
+              0.051583096,
+              0.07360003,
+              -0.029443117,
+              -0.014564469,
+              0.042043358,
+              -0.020252181,
+              0.0147808045,
+              -0.0285806,
+              0.07891856,
+              0.056849223,
+              -0.106308356,
+              0.0197874,
+              0.0269322,
+              -0.04749746,
+              -0.066681586,
+              -0.10474516,
+              0.012599429,
+              -0.056163482,
+              -0.04901015,
+              -0.04571026,
+              0.09704481,
+              -0.105899766,
+              0.044303197,
+              -0.020125533,
+              -0.0368709,
+              -0.015417924,
+              0.042297333,
+              -8.289866e-35,
+              0.07415767,
+              0.10998298,
+              -0.016995763,
+              0.01066263,
+              -0.0012327223,
+              0.028000232,
+              0.0714317,
+              -0.02320065,
+              -0.07778205,
+              0.11864239,
+              -0.016559754,
+              0.037961867,
+              0.02930022,
+              -0.008237686,
+              0.059777655,
+              0.008086454,
+              0.02075205,
+              0.025284613,
+              -0.055471037,
+              0.0073576584,
+              -0.013398135,
+              0.11896543,
+              -0.014611002,
+              0.07691816,
+              -0.019711656,
+              -0.01920917,
+              -0.004744884,
+              0.08173054,
+              0.019665759,
+              -0.013193461,
+              0.06215852,
+              0.07420406,
+              -0.073212065,
+              0.036052067,
+              0.07328616,
+              -0.057373393,
+              0.08346425,
+              0.018834447,
+              0.03309735,
+              0.041197047,
+              0.033917964,
+              0.09151449,
+              -0.051731598,
+              0.049615093,
+              0.01124018,
+              0.06661862,
+              0.07268375,
+              -0.013245848,
+              -0.039673895,
+              -0.012173254,
+              0.0017787582,
+              -0.05746287,
+              -0.013884767,
+              0.020205025,
+              -0.029692367,
+              -0.031010685,
+              0.0149556715,
+              0.026381323,
+              -0.025382591,
+              0.0074336748,
+              -0.00949915,
+              0.015655186,
+              -0.0012397208,
+              -0.0032508406,
+              -0.046632554,
+              -0.0030316226,
+              -0.007273208,
+              0.064231135,
+              -0.034431897,
+              -0.06433184,
+              0.045421343,
+              -0.010773523,
+              -0.017881984,
+              0.010312532,
+              -0.024369273,
+              -0.008478495,
+              -0.02457377,
+              -0.0263535,
+              -0.027263613,
+              0.047060315,
+              0.08128726,
+              0.0045517692,
+              -0.010821656,
+              0.026526682,
+              0.018961033,
+              0.059243083,
+              0.001561823,
+              0.09838158,
+              0.00822081,
+              0.008796511,
+              -0.0060577285,
+              0.028892087,
+              0.08253284,
+              0.049560018,
+              0.023363132,
+              -1.498271e-08,
+              -0.036891207,
+              -0.10629833,
+              0.030452948,
+              0.049268734,
+              -0.0030453752,
+              0.07413954,
+              -0.07043819,
+              -0.034285706,
+              -0.009679971,
+              -0.046219327,
+              0.013510038,
+              -0.018686565,
+              -0.048570327,
+              0.0028313443,
+              -0.06190722,
+              -0.053201936,
+              0.0060967463,
+              -0.043467365,
+              0.042226154,
+              0.03455835,
+              -0.0375257,
+              0.023590367,
+              0.054896712,
+              0.029878648,
+              0.019286606,
+              0.026097741,
+              0.06938145,
+              0.06272366,
+              -0.09566521,
+              -0.07481147,
+              0.025204772,
+              0.039396077,
+              0.036375154,
+              -0.01104443,
+              -0.028223084,
+              0.111878626,
+              0.13400707,
+              0.06680113,
+              -0.011737675,
+              -0.03585406,
+              -0.07978788,
+              0.032793757,
+              -0.0021075818,
+              -0.028365146,
+              -0.042218164,
+              -0.08132239,
+              -0.0753423,
+              -0.043771427,
+              -0.015633285,
+              -0.14193884,
+              -0.055949364,
+              0.025526602,
+              -0.023186589,
+              0.061106257,
+              -0.056208834,
+              0.00838827,
+              0.014720396,
+              -0.014650135,
+              -0.012830787,
+              0.08434067,
+              0.024660436,
+              0.05366935,
+              0.005782819,
+              -0.10599063
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/ff568685962ecba61ca6c2811cb2576f78baaac506fc2c69bb14079201783605.json b/tests/integration/common/recordings/ff568685962ecba61ca6c2811cb2576f78baaac506fc2c69bb14079201783605.json
new file mode 100644
index 000000000..554106d55
--- /dev/null
+++ b/tests/integration/common/recordings/ff568685962ecba61ca6c2811cb2576f78baaac506fc2c69bb14079201783605.json
@@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is batch test file 0"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.04614301,
+              -0.020081447,
+              -0.13696706,
+              0.014573554,
+              0.0701535,
+              -0.023059264,
+              0.0017123591,
+              0.046829354,
+              -0.04367561,
+              -0.114361376,
+              -0.035698596,
+              0.06475817,
+              0.05364872,
+              0.038444433,
+              -0.01979135,
+              -0.037200496,
+              0.036657624,
+              -0.06543346,
+              0.019384816,
+              0.014107363,
+              0.08575862,
+              0.005253997,
+              -0.068923116,
+              0.010090121,
+              0.12664902,
+              0.007504762,
+              -0.01953818,
+              0.050352264,
+              -0.054028552,
+              -0.032810874,
+              0.06410688,
+              0.02482149,
+              0.013947014,
+              -0.018964177,
+              -0.014869268,
+              -0.014962293,
+              0.015406188,
+              0.0019375562,
+              0.045115244,
+              0.01663003,
+              0.011144363,
+              -0.0072048977,
+              0.03155388,
+              -0.044834215,
+              -0.0060860706,
+              0.04020486,
+              0.018086052,
+              0.04788624,
+              -0.00983748,
+              0.013905991,
+              -0.015586391,
+              -0.04333209,
+              -0.025767654,
+              0.0115056895,
+              0.023292946,
+              7.8543904e-05,
+              -0.025338432,
+              0.027270807,
+              -0.033308506,
+              -0.0034880606,
+              0.027414253,
+              0.023092583,
+              -0.046051297,
+              0.05614708,
+              0.0013318001,
+              -0.009060849,
+              -0.025585877,
+              0.01975632,
+              0.005582998,
+              0.029287277,
+              0.04566754,
+              -0.025097856,
+              -0.031588476,
+              0.010089205,
+              -0.032345522,
+              -0.05282097,
+              -0.027767532,
+              -0.064588815,
+              -0.048720118,
+              -0.019109437,
+              0.018205147,
+              0.023525203,
+              0.030500842,
+              0.062187836,
+              0.056703616,
+              -0.012734468,
+              -0.0023994881,
+              -0.01470007,
+              -0.014610555,
+              0.041148573,
+              0.02209264,
+              0.016696744,
+              0.01664216,
+              -0.042584907,
+              -0.030513879,
+              0.009484068,
+              0.038292237,
+              0.049507294,
+              -0.008864681,
+              -0.026641846,
+              -0.00806868,
+              0.015242631,
+              0.03148721,
+              -0.029101137,
+              -0.001128117,
+              0.047483873,
+              -0.021579307,
+              -0.0061297114,
+              -0.051103026,
+              -0.01100252,
+              -0.007417349,
+              0.033126004,
+              -0.03208908,
+              -0.014004128,
+              0.0055860616,
+              -0.004471519,
+              0.040237978,
+              -0.011666332,
+              -0.03375841,
+              0.010431967,
+              0.015548171,
+              -0.003738259,
+              0.006507693,
+              0.044239193,
+              0.022051405,
+              0.0369485,
+              -0.08473572,
+              0.050257385,
+              0.021561263,
+              -0.038263254,
+              -0.0028757958,
+              0.004459847,
+              -0.0499833,
+              -0.05051039,
+              0.038672045,
+              0.027102912,
+              -0.038302545,
+              -0.04273586,
+              -0.008564898,
+              0.010148107,
+              -0.033453222,
+              0.025933161,
+              0.034907244,
+              0.05704188,
+              0.047914036,
+              -0.033055704,
+              0.037968747,
+              0.021832222,
+              -0.021085719,
+              0.020705225,
+              0.0013380332,
+              0.0033825892,
+              -0.004659198,
+              0.03569596,
+              0.035501115,
+              -0.07247981,
+              0.024580602,
+              -0.016031673,
+              0.0043628234,
+              0.044260535,
+              0.022414433,
+              -0.024638122,
+              -0.027389847,
+              -0.015699405,
+              -0.0736989,
+              -0.07402259,
+              0.021693923,
+              0.08675446,
+              0.07019457,
+              -0.010507776,
+              -0.053982176,
+              -0.050338153,
+              0.022691121,
+              -0.009254433,
+              -0.08471297,
+              -0.02192142,
+              0.01370606,
+              -0.007591457,
+              0.04464477,
+              -0.041420456,
+              0.014799598,
+              -0.017481469,
+              0.025636235,
+              0.021010395,
+              -0.007866782,
+              -0.044533994,
+              0.02992151,
+              -0.01817989,
+              -0.046332225,
+              -0.038017664,
+              -0.010766631,
+              -0.053923517,
+              -0.06885444,
+              -0.083982274,
+              0.0044967085,
+              -0.059554394,
+              -0.012864852,
+              -0.006990424,
+              0.04381485,
+              -0.019732013,
+              0.0047561186,
+              0.012573004,
+              -0.084608465,
+              0.044956904,
+              -0.043024026,
+              -0.008965278,
+              -0.018597735,
+              0.023019703,
+              -0.058244467,
+              0.03946037,
+              0.0070161144,
+              -0.0072559468,
+              -0.038439214,
+              0.007182057,
+              0.04479635,
+              -0.04825045,
+              0.020951761,
+              -0.04177098,
+              -0.015445904,
+              0.0024602767,
+              0.019107025,
+              0.01879466,
+              0.015647886,
+              -0.03868733,
+              0.0010552967,
+              -0.031725515,
+              0.003677792,
+              -0.008869332,
+              0.016350579,
+              -0.025660282,
+              -0.0033997998,
+              -0.053593792,
+              0.01300085,
+              0.014504953,
+              -0.04167999,
+              -0.013626902,
+              -0.013473784,
+              0.04477186,
+              -0.039079364,
+              0.045125194,
+              -0.038965665,
+              0.041032016,
+              -0.021128332,
+              -0.00079906755,
+              0.0105881365,
+              -0.023438545,
+              -0.009942863,
+              -0.028716002,
+              0.031107599,
+              0.017214399,
+              -0.027654208,
+              0.013554825,
+              0.019173222,
+              0.055249233,
+              0.00617875,
+              -0.01951432,
+              -0.008078177,
+              -0.045365352,
+              -0.013410786,
+              -0.06576592,
+              0.0258962,
+              -0.04870149,
+              0.028375717,
+              0.02127929,
+              0.0074190334,
+              -0.015849123,
+              -0.050413407,
+              0.027654368,
+              -0.01447592,
+              -0.0046318294,
+              0.003353468,
+              0.02456171,
+              -0.006699941,
+              -0.04072025,
+              0.030406132,
+              0.003700867,
+              0.04991202,
+              0.043061696,
+              -0.0014100377,
+              0.038879305,
+              -0.02551224,
+              -0.03253989,
+              0.002335025,
+              0.0066566374,
+              0.013019894,
+              -0.017884579,
+              0.03333752,
+              -0.005329557,
+              0.025703372,
+              0.01243421,
+              0.0015536154,
+              -0.0011326541,
+              -0.02956871,
+              0.010284604,
+              -0.0017640645,
+              0.030444842,
+              0.024831444,
+              -0.015894072,
+              -0.017051669,
+              -0.012481152,
+              -0.021874228,
+              0.032245617,
+              0.029441461,
+              -0.019289171,
+              0.015042458,
+              -0.048809912,
+              0.045543794,
+              -0.025887825,
+              -0.0017429133,
+              0.050035972,
+              -0.094813764,
+              -0.026645338,
+              -0.03496652,
+              0.02787559,
+              -0.009335962,
+              0.050203443,
+              0.007864018,
+              0.008651598,
+              0.07439614,
+              -0.04608253,
+              0.008741113,
+              -0.096183665,
+              0.01909248,
+              0.02903942,
+              -0.020657493,
+              0.03056416,
+              0.025593685,
+              0.05326756,
+              -0.035201855,
+              -0.0042431992,
+              0.047313657,
+              0.04643017,
+              -0.015038583,
+              -0.03623203,
+              0.06975197,
+              0.02893981,
+              -0.012065428,
+              0.03489605,
+              -0.02045082,
+              0.08106463,
+              0.03046569,
+              0.05845714,
+              0.038226783,
+              -0.0039640213,
+              0.020310445,
+              -0.044298742,
+              -0.011063444,
+              0.031646963,
+              -0.016750742,
+              0.06093846,
+              -0.0024345908,
+              0.0137670245,
+              0.01068818,
+              0.00028172386,
+              0.024276268,
+              0.007246687,
+              0.017009424,
+              -0.0058112424,
+              0.055742696,
+              0.0020487534,
+              0.0041393945,
+              -0.002708682,
+              -0.0111793615,
+              -0.016895374,
+              0.009005465,
+              0.025580926,
+              -0.015197682,
+              0.0152440565,
+              0.049733438,
+              0.00909726,
+              -0.04997614,
+              -0.054340348,
+              0.047531743,
+              0.052675292,
+              0.0002477018,
+              0.017530492,
+              -0.04548658,
+              0.0034042338,
+              -0.027109472,
+              0.0540901,
+              0.05400029,
+              -0.039156314,
+              -0.010473526,
+              0.036758192,
+              0.012307892,
+              -0.043290082,
+              0.021435479,
+              -0.013614977,
+              0.047010962,
+              0.061001405,
+              0.0067372657,
+              0.0227589,
+              -0.0519168,
+              0.012738339,
+              -0.027280986,
+              0.0012095303,
+              0.053970173,
+              0.011808772,
+              -0.06391073,
+              0.049324006,
+              -0.016165268,
+              -0.035052363,
+              0.011938826,
+              0.033804722,
+              -0.033935532,
+              0.014987266,
+              -0.03362387,
+              -0.022560425,
+              0.05126289,
+              -0.008983691,
+              0.05116898,
+              -0.053964064,
+              -0.038813572,
+              -0.06834585,
+              0.0425859,
+              0.029469976,
+              0.06586096,
+              0.056827266,
+              -0.028529037,
+              -0.022799347,
+              0.03930842,
+              0.009058165,
+              0.0029452725,
+              -0.046222363,
+              -0.015354657,
+              0.020766245,
+              0.00544761,
+              0.054154783,
+              0.024518205,
+              -0.0762551,
+              -0.03815425,
+              0.030558256,
+              -0.014623021,
+              0.04429291,
+              -0.02593325,
+              0.06950066,
+              -0.011652937,
+              0.00784224,
+              0.010082946,
+              0.02640965,
+              0.044778366,
+              -0.017441178,
+              -0.042124864,
+              0.030845765,
+              -0.047991402,
+              0.1127873,
+              0.11150797,
+              -0.0745599,
+              -0.059560712,
+              -0.00808373,
+              -0.008904883,
+              0.047381986,
+              -0.03259649,
+              -0.0034343451,
+              0.043409795,
+              -0.011778097,
+              0.017888952,
+              -0.042976636,
+              -0.014014427,
+              0.013991117,
+              0.008008242,
+              -0.005016844,
+              0.053890087,
+              0.056538153,
+              0.016641492,
+              -0.011209175,
+              0.005071369,
+              -0.031119458,
+              -0.012060056,
+              0.047321502,
+              -0.01410517,
+              -0.06337502,
+              0.057011377,
+              -0.046111424,
+              -0.022285707,
+              0.00068395643,
+              -0.01453697,
+              0.0030104562,
+              0.031148981,
+              0.029581407,
+              0.007647941,
+              0.011242783,
+              -0.026178291,
+              -0.05194385,
+              0.037139274,
+              0.026292743,
+              0.01298006,
+              0.023150109,
+              0.06221823,
+              -0.024437338,
+              0.056873403,
+              0.027463028,
+              0.07723492,
+              0.0019251422,
+              0.042778768,
+              -0.026794884,
+              -0.016140813,
+              -0.037990715,
+              0.0015520528,
+              0.01605836,
+              -0.012476547,
+              -0.01679565,
+              0.027481532,
+              0.018949807,
+              0.010083091,
+              -0.01057625,
+              -0.024935285,
+              -0.031943906,
+              -0.051917356,
+              -0.04344679,
+              -0.04837223,
+              -0.009939983,
+              0.040695325,
+              0.024695948,
+              0.063317895,
+              -0.0018597379,
+              0.016552558,
+              -0.047521863,
+              -0.07224005,
+              0.042071674,
+              0.016915316,
+              0.014148548,
+              0.01878253,
+              -0.026108567,
+              -0.06437781,
+              0.021399872,
+              0.011175348,
+              0.0033761705,
+              -0.004680718,
+              0.03344319,
+              0.0031177911,
+              0.053175025,
+              0.028025331,
+              -0.0069551654,
+              -0.034634676,
+              -0.012221638,
+              -0.035786934,
+              0.04296345,
+              -0.01631924,
+              0.060271725,
+              -0.04230959,
+              -0.0064216405,
+              0.0013953961,
+              -0.041444454,
+              -0.008569435,
+              -0.01984154,
+              -0.061582044,
+              0.049848285,
+              -0.010022811,
+              -0.07785035,
+              -0.006366211,
+              -0.012778517,
+              -0.037107654,
+              -0.034078293,
+              -0.0019027964,
+              -0.018393178,
+              -0.031273652,
+              -0.030624373,
+              -0.047289733,
+              -0.055507194,
+              0.0149980355,
+              0.009802669,
+              0.05346352,
+              0.011616594,
+              0.040882636,
+              -0.05801636,
+              -0.018325027,
+              0.033699974,
+              -0.015700053,
+              -0.018874831,
+              0.00975098,
+              -0.028787887,
+              -0.010430304,
+              -0.019937277,
+              -0.025684841,
+              -0.017275153,
+              0.048182886,
+              0.040767677,
+              -0.006017042,
+              -0.012711738,
+              -0.0010345151,
+              0.015744662,
+              0.023162043,
+              0.02130765,
+              -0.0024493549,
+              0.015457228,
+              0.037933253,
+              -0.031316977,
+              0.06891338,
+              0.005748761,
+              -0.07730445,
+              -0.032125294,
+              0.036361482,
+              0.0061598606,
+              0.018043444,
+              0.038325332,
+              -0.036203355,
+              -0.0123121375,
+              -0.022851182,
+              -0.035532467,
+              -0.041686766,
+              0.03709366,
+              -0.0017735043,
+              -0.018472947,
+              -0.045957465,
+              -0.023627242,
+              0.01808581,
+              0.015027068,
+              -0.042559687,
+              -0.009885546,
+              0.057179235,
+              -0.03215653,
+              -0.048862357,
+              -0.012386838,
+              -0.021847295,
+              -0.044682942,
+              0.040646516,
+              0.00038476288,
+              0.005513208,
+              -0.03062349,
+              0.011521192,
+              -0.035988722,
+              0.061369143,
+              -0.020910813,
+              0.075483516,
+              -0.045259465,
+              -0.02859422,
+              0.015579937,
+              0.0075254533,
+              0.038143836,
+              -0.045940828,
+              0.027484732,
+              -0.091758996,
+              -0.048610084,
+              -0.095563754,
+              0.0004537795,
+              -0.05040322,
+              0.02240349,
+              0.046084013,
+              0.04480506,
+              0.037050348,
+              1.0597447e-05,
+              -0.018571958,
+              0.009857055,
+              0.021747472,
+              0.031625595,
+              -0.03629067,
+              0.037058298,
+              0.041504655,
+              -0.03894645,
+              0.046530657,
+              0.08956203,
+              0.05101704,
+              0.005822723,
+              -0.014409921,
+              0.0050498573,
+              0.039041325,
+              -0.010459366,
+              -0.022216242,
+              -0.07559245,
+              0.019515479,
+              -0.010434134,
+              -0.040965218,
+              0.006768683,
+              0.021648958,
+              0.059341215,
+              0.0044922573,
+              -0.011139294,
+              0.023696495,
+              -0.04251101,
+              0.028621383,
+              0.005927879,
+              0.05084491,
+              -0.01525845,
+              0.03151167,
+              0.008018476,
+              0.05309983,
+              0.059823282,
+              -0.02189311,
+              0.010798892,
+              0.0027545195,
+              -0.024435053,
+              0.042531513,
+              0.028011957,
+              0.0147431465,
+              -0.062116392,
+              0.032930456,
+              -0.03597175,
+              0.002567075,
+              -0.030825771,
+              -0.0070259375,
+              0.007989939,
+              -0.027159046,
+              -0.0714439,
+              -0.020082822,
+              -0.018486606,
+              0.01108784,
+              -0.012602704,
+              -0.0012252157,
+              0.06443626,
+              0.036829114,
+              0.04501229,
+              0.0022744364,
+              0.058829524,
+              -0.008902569,
+              -0.010049271,
+              -0.0064951205,
+              -0.014354489,
+              -0.044668842,
+              -0.025392724,
+              0.015202658,
+              0.020321742,
+              -0.01176466,
+              0.09413702,
+              -0.0319812,
+              0.03219725,
+              -0.040439297,
+              -0.019967683,
+              0.0164714,
+              0.019272799,
+              0.02388655,
+              0.017886775,
+              -0.03603167,
+              -0.023737542,
+              -0.01898098,
+              -0.04790894,
+              -0.036694597,
+              -0.02994124,
+              0.034576166,
+              -0.05921917,
+              -0.022381892,
+              -0.051536635,
+              -0.05452498,
+              0.053339027,
+              0.019327087,
+              0.012448543,
+              -0.018923279,
+              -0.0019192714,
+              -0.01976354,
+              0.032581042,
+              -0.00695812,
+              0.033768184,
+              -0.028018538,
+              -0.023666212,
+              -0.017496848,
+              0.023191998,
+              -0.0502938,
+              0.01670451,
+              0.0058311033,
+              0.012473936,
+              0.023568941,
+              -0.06854558,
+              0.0073930174,
+              0.07903637,
+              -0.024922114,
+              -0.026363779,
+              -0.006970082,
+              -0.007723444,
+              0.074576765,
+              -0.032073244,
+              -0.013143484,
+              -0.010095435,
+              0.018318929,
+              0.008086707,
+              -0.01570327,
+              -0.046567768,
+              0.0038824868,
+              -0.027711825
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-44869b1b.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-44869b1b.json
new file mode 100644
index 000000000..b5b53d1c6
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-44869b1b.json
@@ -0,0 +1,34 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1759793684,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1759791776,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-79be7c70.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-79be7c70.json
new file mode 100644
index 000000000..601f5b5a9
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-79be7c70.json
@@ -0,0 +1,25 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1759785110,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-7becc84f.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-7becc84f.json
new file mode 100644
index 000000000..8c0455bda
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-7becc84f.json
@@ -0,0 +1,70 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "nomic-embed-text:latest",
+          "created": 1755204798,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:8b",
+          "created": 1755125995,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753804403,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1752697170,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:latest",
+          "created": 1752691712,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1752267588,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e3b0c442.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e3b0c442.json
new file mode 100644
index 000000000..1257e8977
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e3b0c442.json
@@ -0,0 +1,15 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e8733dec.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e8733dec.json
new file mode 100644
index 000000000..39b63dea4
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e8733dec.json
@@ -0,0 +1,25 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1759791776,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/common/recordings/models-bd3df37825f32706c88677a327960bfa47dcf93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json b/tests/integration/common/recordings/models-bd3df37825f32706c88677a327960bfa47dcf93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json
new file mode 100644
index 000000000..736a05ef3
--- /dev/null
+++ b/tests/integration/common/recordings/models-bd3df37825f32706c88677a327960bfa47dcf93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json
@@ -0,0 +1,528 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-dev-fp8",
+          "created": 1729532889,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-max",
+          "created": 1750714611,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-pro",
+          "created": 1750488264,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
+          "created": 1748467427,
+          "object": "model",
+          "owned_by": "sentientfoundation-serverless",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
+          "created": 1739563474,
+          "object": "model",
+          "owned_by": "sentientfoundation",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-120b",
+          "created": 1754345600,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
+          "created": 1753916446,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
+          "created": 1753124424,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
+          "created": 1753455434,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3-0324",
+          "created": 1742827220,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct",
+          "created": 1752259096,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-20b",
+          "created": 1754345466,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct-0905",
+          "created": 1757018994,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3",
+          "created": 1735576668,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+          "created": 1733442103,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b",
+          "created": 1745885249,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5-air",
+          "created": 1754089426,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1",
+          "created": 1737397673,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-basic",
+          "created": 1742306746,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1",
+          "created": 1755758988,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-schnell-fp8",
+          "created": 1729535376,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
+          "created": 1721428386,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-scout-instruct-basic",
+          "created": 1743878279,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b",
+          "created": 1745878133,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
+          "created": 1721287357,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-0528",
+          "created": 1748456377,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
+          "created": 1713375508,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 65536
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-embedding-8b",
+          "created": 1755707090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "EMBEDDING_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 40960
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
+          "created": 1753808388,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+          "created": 1743878495,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
+          "created": 1743381121,
+          "object": "model",
+          "owned_by": "tvergho-87e44d",
+          "kind": "HF_PEFT_ADDON",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
+          "created": 1743392739,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false,
+          "context_length": 128000
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
+          "created": 1754063588,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1-terminus",
+          "created": 1758586241,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "created": 1721692808,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          "created": 1753211090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5",
+          "created": 1753809636,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
new file mode 100644
index 000000000..23d2704e1
--- /dev/null
+++ b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
@@ -0,0 +1,527 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-dev-fp8",
+          "created": 1729532889,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-max",
+          "created": 1750714611,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-pro",
+          "created": 1750488264,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
+          "created": 1748467427,
+          "object": "model",
+          "owned_by": "sentientfoundation-serverless",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
+          "created": 1739563474,
+          "object": "model",
+          "owned_by": "sentientfoundation",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-120b",
+          "created": 1754345600,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
+          "created": 1753916446,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
+          "created": 1753124424,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
+          "created": 1753455434,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-embedding-8b",
+          "created": 1755707090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 40960
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3-0324",
+          "created": 1742827220,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct",
+          "created": 1752259096,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-20b",
+          "created": 1754345466,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+          "created": 1743878495,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct-0905",
+          "created": 1757018994,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3",
+          "created": 1735576668,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+          "created": 1733442103,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b",
+          "created": 1745885249,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5-air",
+          "created": 1754089426,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1",
+          "created": 1737397673,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "created": 1721692808,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-basic",
+          "created": 1742306746,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1",
+          "created": 1755758988,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-schnell-fp8",
+          "created": 1729535376,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
+          "created": 1721428386,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-scout-instruct-basic",
+          "created": 1743878279,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b",
+          "created": 1745878133,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
+          "created": 1721287357,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-0528",
+          "created": 1748456377,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
+          "created": 1713375508,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 65536
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
+          "created": 1753808388,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
+          "created": 1743392739,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false,
+          "context_length": 128000
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
+          "created": 1754063588,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
+          "created": 1743381121,
+          "object": "model",
+          "owned_by": "tvergho-87e44d",
+          "kind": "HF_PEFT_ADDON",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1-terminus",
+          "created": 1758586241,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          "created": 1753211090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5",
+          "created": 1753809636,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/models-bd032f995f2a-7becc84f.json b/tests/integration/recordings/responses/models-bd032f995f2a-7becc84f.json
new file mode 100644
index 000000000..b44ff0ecc
--- /dev/null
+++ b/tests/integration/recordings/responses/models-bd032f995f2a-7becc84f.json
@@ -0,0 +1,69 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "nomic-embed-text:latest",
+          "created": 1755204798,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:8b",
+          "created": 1755125995,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753804403,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1752697170,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:latest",
+          "created": 1752691712,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1752267588,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/0fbf282a067bb1fe2c9fa5c96287b1a0700b6c74372d556c873dda39c603d844.json b/tests/integration/vector_io/recordings/0fbf282a067bb1fe2c9fa5c96287b1a0700b6c74372d556c873dda39c603d844.json
new file mode 100644
index 000000000..6f8e7c445
--- /dev/null
+++ b/tests/integration/vector_io/recordings/0fbf282a067bb1fe2c9fa5c96287b1a0700b6c74372d556c873dda39c603d844.json
@@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_retrieve_contents[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 2"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.014871168,
+              0.094365,
+              -0.098275684,
+              0.016189486,
+              0.072296426,
+              -0.039229725,
+              0.007638039,
+              0.035811495,
+              -0.03784589,
+              0.022591105,
+              0.15810202,
+              0.009195058,
+              -0.029846655,
+              -0.06448414,
+              -0.01898075,
+              -0.02023675,
+              -0.07593923,
+              -0.04666322,
+              0.010769107,
+              0.033283222,
+              0.06951838,
+              0.039086174,
+              -0.009640043,
+              -0.008601025,
+              0.039979056,
+              0.02799972,
+              -0.06578151,
+              0.08029443,
+              0.0101568075,
+              -0.07898879,
+              0.048795786,
+              0.057297125,
+              0.025737243,
+              0.03572965,
+              0.11485981,
+              0.030900626,
+              0.118485495,
+              -0.041167885,
+              -0.019413618,
+              -0.0009897926,
+              0.03717747,
+              -0.012367201,
+              -0.0026639055,
+              0.015703445,
+              -0.0046827365,
+              0.023138778,
+              0.012855939,
+              -0.029367425,
+              0.00042996072,
+              -0.003222942,
+              -0.055509202,
+              0.012830617,
+              -0.06941755,
+              -0.011024706,
+              0.07149942,
+              0.021040803,
+              0.0409756,
+              0.010087916,
+              -0.015326204,
+              0.06633094,
+              0.024846299,
+              0.030543685,
+              -0.036063526,
+              0.04786587,
+              0.08074621,
+              -0.051489003,
+              -0.03944393,
+              -0.025607359,
+              -0.030061793,
+              -0.119378455,
+              -0.14597124,
+              -0.0019379344,
+              0.008393092,
+              0.023913048,
+              0.028285578,
+              0.017838098,
+              -0.10575887,
+              0.008080291,
+              0.06388723,
+              -0.12506105,
+              -0.02536782,
+              -0.11007926,
+              0.051198784,
+              0.007446184,
+              -0.030837545,
+              0.09254253,
+              0.05638562,
+              -0.0155668175,
+              -0.031867314,
+              0.018337138,
+              0.02442871,
+              -0.042078987,
+              0.0038125275,
+              0.089955,
+              -0.008119613,
+              0.040103614,
+              0.011012824,
+              0.044628628,
+              0.0791957,
+              0.054247666,
+              -0.027651828,
+              -0.03190785,
+              0.041443683,
+              0.041629724,
+              -0.077835254,
+              -0.09937542,
+              0.029904107,
+              -0.05434366,
+              0.07058962,
+              -0.04535761,
+              0.03365359,
+              -0.061656676,
+              -0.018105442,
+              -0.07228336,
+              0.035377987,
+              -0.03161877,
+              -0.020589713,
+              0.058485094,
+              -0.049225487,
+              0.03934316,
+              0.08550028,
+              -0.029991213,
+              -0.05576064,
+              -0.029334918,
+              -0.053031918,
+              -0.061839186,
+              0.08176057,
+              -3.3282106e-33,
+              0.00018265574,
+              -0.09808404,
+              -0.00554673,
+              0.13180184,
+              0.026467713,
+              -0.03976283,
+              0.010410568,
+              0.022475285,
+              -0.07190717,
+              0.005138454,
+              -0.021325583,
+              -0.1046733,
+              0.0020021838,
+              0.023773609,
+              -0.057499945,
+              -0.011727483,
+              -0.020912478,
+              0.026353713,
+              0.01779019,
+              -0.0148312645,
+              0.064687304,
+              0.045060385,
+              -0.029312065,
+              -0.08633001,
+              -0.026792597,
+              0.014552106,
+              0.004505434,
+              -0.06774755,
+              0.034052122,
+              0.013713737,
+              -0.0075813113,
+              -0.059718475,
+              -0.016189422,
+              0.044314116,
+              0.026844766,
+              0.026430624,
+              0.024091395,
+              -0.0032406747,
+              -0.075288124,
+              0.032822173,
+              0.027104331,
+              -0.026295068,
+              0.04316082,
+              -0.010091815,
+              0.034184698,
+              -0.08266358,
+              -0.020962045,
+              -0.00719584,
+              0.068549044,
+              0.005033586,
+              0.0017975906,
+              0.06465498,
+              0.05990613,
+              -0.012483792,
+              0.024451919,
+              0.021659598,
+              -0.0046074707,
+              -0.004559902,
+              0.002713282,
+              0.062373567,
+              0.0035651235,
+              0.06017224,
+              -0.062707886,
+              0.039937016,
+              -0.0064443815,
+              -0.041358124,
+              -0.045459975,
+              -0.1090475,
+              0.08058783,
+              0.055110224,
+              -0.05126053,
+              -0.05976516,
+              0.037940193,
+              0.015456569,
+              -0.024956519,
+              -0.037877902,
+              -0.006799,
+              0.031685203,
+              -0.036858797,
+              -0.055584695,
+              -0.048513155,
+              -0.07101657,
+              -0.041681714,
+              -0.04429727,
+              -0.09584418,
+              -0.060873836,
+              0.008867621,
+              -0.106438614,
+              0.040050562,
+              -0.084729105,
+              0.018111277,
+              0.010153493,
+              -0.08883196,
+              -0.063969284,
+              0.08611972,
+              1.4074298e-33,
+              0.03433739,
+              0.037653737,
+              -0.05348675,
+              0.0015385789,
+              0.026684077,
+              0.026603375,
+              0.07006387,
+              -0.034265522,
+              -0.018221779,
+              0.10960259,
+              0.013464475,
+              -0.008325532,
+              0.019438146,
+              -0.039553005,
+              0.03469477,
+              -0.0123773115,
+              -0.013288484,
+              -0.048081715,
+              -0.019539693,
+              -0.0033996427,
+              -0.024453517,
+              0.061505664,
+              0.119236834,
+              0.026294904,
+              -0.01607055,
+              -0.011499089,
+              0.04267117,
+              0.0295908,
+              0.022084564,
+              0.007893738,
+              0.052055445,
+              0.05781507,
+              -0.13408813,
+              0.01778491,
+              0.021400984,
+              -0.12113228,
+              0.10535695,
+              -0.07358604,
+              -0.013651957,
+              0.04049295,
+              0.054150987,
+              0.0987462,
+              0.0110208625,
+              0.040327504,
+              0.034936633,
+              0.10400846,
+              0.12958324,
+              -0.024531014,
+              0.002284699,
+              -0.044239815,
+              0.049778443,
+              -0.055788964,
+              0.015235888,
+              0.0034493478,
+              -0.02607555,
+              0.060282644,
+              -0.028004775,
+              0.040875163,
+              -0.023749253,
+              0.002289086,
+              0.04982698,
+              0.046928305,
+              -0.064160004,
+              0.013701618,
+              0.015511878,
+              -0.054725982,
+              -0.0459802,
+              0.03258067,
+              0.027034523,
+              0.01643672,
+              -0.041782584,
+              -0.03698569,
+              -0.023043923,
+              -0.07073365,
+              0.028486207,
+              0.0017764921,
+              -0.03352676,
+              -0.009977863,
+              0.024488676,
+              -0.01789395,
+              0.029737154,
+              -0.026266927,
+              -0.03567072,
+              0.07469971,
+              0.028393274,
+              -0.029625034,
+              -0.01053128,
+              0.09147493,
+              -0.018718474,
+              0.0012933073,
+              -0.021214467,
+              0.07475739,
+              -0.007773536,
+              0.048597455,
+              0.005216022,
+              -1.6914717e-08,
+              -0.05724563,
+              -0.0938908,
+              -0.034359876,
+              -0.037500683,
+              -0.020235153,
+              0.06142227,
+              -0.042273093,
+              -0.008759724,
+              -0.009908796,
+              0.016232042,
+              -0.014239323,
+              0.024709346,
+              -0.030538557,
+              -0.05391127,
+              -0.051778477,
+              0.01277344,
+              0.0036140021,
+              -0.012569925,
+              -0.025041323,
+              -0.0203936,
+              0.025865255,
+              0.010908398,
+              0.027834684,
+              0.009661084,
+              -0.006598172,
+              0.07860872,
+              0.054516125,
+              0.042956624,
+              -0.06275145,
+              -0.025701547,
+              0.08085865,
+              0.030041302,
+              0.02248997,
+              -0.0840195,
+              0.00029938898,
+              0.10966559,
+              0.118907265,
+              0.063014604,
+              0.037847042,
+              0.032069027,
+              -0.05345487,
+              -0.022730324,
+              0.0071888734,
+              0.037573762,
+              -0.020178014,
+              -0.090167634,
+              -0.07191704,
+              -0.02604166,
+              -0.043885063,
+              -0.14087014,
+              -0.017230472,
+              -0.012063355,
+              -0.046736836,
+              0.039048597,
+              -0.060394738,
+              0.022166032,
+              0.025670663,
+              0.022949725,
+              -0.06707243,
+              -0.014654702,
+              0.057985142,
+              0.10511708,
+              0.05698323,
+              -0.017205814
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/4b8ce5031f00e754bbb6e1f55109ae8f46ac7086afb48820a36c41a3cb994cb9.json b/tests/integration/vector_io/recordings/4b8ce5031f00e754bbb6e1f55109ae8f46ac7086afb48820a36c41a3cb994cb9.json
new file mode 100644
index 000000000..aa208aa69
--- /dev/null
+++ b/tests/integration/vector_io/recordings/4b8ce5031f00e754bbb6e1f55109ae8f46ac7086afb48820a36c41a3cb994cb9.json
@@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_retrieve_contents[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.029406669,
+              0.08920982,
+              -0.11326726,
+              0.0065823817,
+              0.07725067,
+              -0.036890104,
+              0.030436223,
+              0.041454185,
+              -0.049156666,
+              0.018258564,
+              0.14662577,
+              0.01744915,
+              -0.012837422,
+              -0.06889876,
+              -0.039401636,
+              -0.038800705,
+              -0.08963421,
+              -0.059656583,
+              0.001375945,
+              0.045138627,
+              0.042796962,
+              0.053700265,
+              -0.035706885,
+              0.010138017,
+              0.060920056,
+              0.017344126,
+              -0.05633907,
+              0.063370295,
+              0.0021257724,
+              -0.083796844,
+              0.050487563,
+              0.047987595,
+              0.069071226,
+              0.049588464,
+              0.117036626,
+              0.05339311,
+              0.10129953,
+              -0.048230153,
+              -0.014987975,
+              0.0250915,
+              0.031392053,
+              -0.008863942,
+              0.0073650074,
+              -0.0009767569,
+              -0.016403567,
+              0.015523393,
+              -0.010998956,
+              -0.014870063,
+              0.0061682137,
+              -0.0017961137,
+              -0.022682818,
+              0.018210242,
+              -0.07757007,
+              -0.0015845516,
+              0.069547005,
+              0.000419109,
+              0.038414054,
+              0.005823485,
+              -0.028931383,
+              0.07009549,
+              -0.0018009909,
+              0.033516172,
+              -0.014593847,
+              0.03922457,
+              0.08240545,
+              -0.050596908,
+              -0.039732855,
+              -0.024425076,
+              -0.015055329,
+              -0.11705068,
+              -0.15979129,
+              -0.008256823,
+              -0.0100719705,
+              0.03266482,
+              0.0029998205,
+              0.0316428,
+              -0.094554916,
+              0.017661797,
+              0.058996264,
+              -0.119718134,
+              -0.027414676,
+              -0.09155906,
+              0.040038,
+              0.01091849,
+              -0.029446004,
+              0.10225186,
+              0.06583262,
+              -0.003439552,
+              -0.009694834,
+              0.016906522,
+              0.023685955,
+              -0.032616187,
+              -0.010238839,
+              0.07891618,
+              -0.007330681,
+              0.05238444,
+              0.00943625,
+              0.042121,
+              0.08491511,
+              0.049208272,
+              -0.01868227,
+              -0.013585418,
+              0.06727199,
+              0.084571496,
+              -0.103213035,
+              -0.08387524,
+              0.03641842,
+              -0.047227863,
+              0.057315867,
+              -0.04463932,
+              0.006783099,
+              -0.08934107,
+              -0.015040418,
+              -0.08107057,
+              0.013285569,
+              -0.060907867,
+              -0.042128306,
+              0.057306163,
+              -0.058711898,
+              0.04628304,
+              0.070194095,
+              -0.041729517,
+              -0.0338408,
+              -0.012369257,
+              -0.044708908,
+              -0.059450094,
+              0.08251312,
+              -3.443368e-33,
+              0.0121309515,
+              -0.11084454,
+              -0.020510655,
+              0.10916455,
+              0.033683147,
+              -0.02845083,
+              0.024345158,
+              0.034192592,
+              -0.08367815,
+              0.0064610844,
+              -0.00912456,
+              -0.0663567,
+              -0.0028754657,
+              0.008272698,
+              -0.09166764,
+              0.0089771375,
+              -0.03963948,
+              0.019947624,
+              -0.01321528,
+              -0.019034218,
+              0.051933073,
+              0.028107261,
+              -0.039153125,
+              -0.080395184,
+              -0.050503474,
+              0.02060341,
+              -0.012718284,
+              -0.046732575,
+              0.017907938,
+              -0.0028334607,
+              -0.011695137,
+              -0.05667005,
+              -0.043894444,
+              0.034919597,
+              0.022352098,
+              0.046777196,
+              0.045085873,
+              -0.008840106,
+              -0.06373453,
+              0.036720857,
+              0.012829601,
+              -0.035169926,
+              0.046209145,
+              -0.014361767,
+              0.03706697,
+              -0.056797564,
+              -0.06310496,
+              0.010818958,
+              0.047810175,
+              0.0029118094,
+              -0.003235893,
+              0.061511047,
+              0.072056666,
+              -0.03286638,
+              0.005070082,
+              0.021947902,
+              -0.017779002,
+              -0.022738373,
+              -0.021926457,
+              0.047074158,
+              0.010847615,
+              0.05539702,
+              -0.07119971,
+              0.033833236,
+              0.012342855,
+              -0.047586687,
+              -0.026776271,
+              -0.09885727,
+              0.10053448,
+              0.036877092,
+              -0.07049897,
+              -0.059692938,
+              0.016129492,
+              -0.0016443401,
+              -0.026804024,
+              -0.013527272,
+              -0.015385511,
+              0.055627547,
+              -0.060485132,
+              -0.055540122,
+              -0.04329072,
+              -0.07097361,
+              -0.04857043,
+              -0.03726256,
+              -0.09059366,
+              -0.036855534,
+              0.024561211,
+              -0.10113953,
+              0.056738112,
+              -0.10995085,
+              0.042282794,
+              0.014222368,
+              -0.07067843,
+              -0.05902307,
+              0.06426122,
+              1.6036318e-33,
+              0.037851896,
+              0.032911286,
+              -0.04029648,
+              -0.00049357174,
+              0.028011942,
+              0.048672136,
+              0.07279598,
+              -0.027471887,
+              -0.02847654,
+              0.114492,
+              0.001777095,
+              -0.009519909,
+              0.0025862327,
+              -0.056408145,
+              0.023462169,
+              -0.006209674,
+              -0.010567065,
+              -0.05877587,
+              -0.032393616,
+              0.011836781,
+              -0.038905054,
+              0.05516299,
+              0.09564333,
+              0.028543225,
+              -0.023832332,
+              -0.0015711841,
+              0.047049087,
+              0.03128219,
+              0.02811091,
+              0.007177092,
+              0.055283513,
+              0.06574452,
+              -0.1020208,
+              0.021213628,
+              0.020237882,
+              -0.10449357,
+              0.09608935,
+              -0.06253181,
+              0.015293753,
+              0.042053986,
+              0.06105009,
+              0.0909162,
+              0.018404186,
+              0.031023262,
+              0.03562763,
+              0.112073965,
+              0.10124763,
+              -0.007683015,
+              0.013140281,
+              -0.042280227,
+              0.051135287,
+              -0.02950743,
+              0.027794402,
+              -0.010734668,
+              -0.011067552,
+              0.058104575,
+              -0.009284788,
+              0.056184508,
+              -0.040822964,
+              0.010282754,
+              0.0374409,
+              0.054198533,
+              -0.061418086,
+              0.030569963,
+              0.0023648597,
+              -0.054184474,
+              -0.020570045,
+              0.012422129,
+              0.025696559,
+              -0.007607385,
+              -0.026194826,
+              -0.024159024,
+              0.0012979766,
+              -0.07461716,
+              0.051458035,
+              -0.004183808,
+              -0.040804464,
+              -0.023975441,
+              0.009455526,
+              -0.0018798193,
+              0.03668693,
+              -0.019319497,
+              -0.06195781,
+              0.06456675,
+              0.040328216,
+              -0.010790134,
+              0.013190221,
+              0.09067539,
+              -0.0051480443,
+              0.013312647,
+              -0.029548675,
+              0.07769003,
+              0.0027328292,
+              0.04533781,
+              -0.0017606319,
+              -1.661594e-08,
+              -0.040610366,
+              -0.09883059,
+              -0.05522113,
+              -0.02916469,
+              -0.019305382,
+              0.088138185,
+              -0.038325552,
+              -0.03327639,
+              -0.012629364,
+              0.006948921,
+              0.010438818,
+              0.026771523,
+              -0.040855426,
+              -0.03958403,
+              -0.051137064,
+              -0.016159322,
+              -0.020525131,
+              -0.023726366,
+              -0.013322245,
+              -0.008097836,
+              0.028000915,
+              0.02806969,
+              0.015645925,
+              -0.0043166955,
+              0.0054488196,
+              0.06720413,
+              0.068473674,
+              0.07172716,
+              -0.06339439,
+              -0.02540609,
+              0.08468492,
+              0.041936778,
+              0.021067144,
+              -0.07596481,
+              0.017143335,
+              0.1260291,
+              0.121315174,
+              0.08431059,
+              0.040587336,
+              0.036687315,
+              -0.04717,
+              -0.022659328,
+              -0.006820436,
+              0.005210712,
+              -0.033785924,
+              -0.08449115,
+              -0.0844501,
+              -0.03192747,
+              -0.036649443,
+              -0.13791409,
+              -0.036417518,
+              -0.00080547476,
+              -0.047578912,
+              0.038795993,
+              -0.06757743,
+              0.016941966,
+              0.036312684,
+              0.0125779435,
+              -0.058240637,
+              0.004471269,
+              0.03226526,
+              0.09821741,
+              0.053010236,
+              -0.016268
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/506216767e53ce1a6ef47637a97d4baa865eda04d9d92e418a7e58da7be1bc2b.json b/tests/integration/vector_io/recordings/506216767e53ce1a6ef47637a97d4baa865eda04d9d92e418a7e58da7be1bc2b.json
new file mode 100644
index 000000000..fa799da03
--- /dev/null
+++ b/tests/integration/vector_io/recordings/506216767e53ce1a6ef47637a97d4baa865eda04d9d92e418a7e58da7be1bc2b.json
@@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_create_and_retrieve[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.009745733,
+              0.03363038,
+              -0.10852256,
+              0.026609829,
+              -0.0060599064,
+              -0.020473678,
+              0.0692486,
+              0.032276765,
+              -0.11532835,
+              -0.0005207133,
+              0.11814916,
+              0.0119809555,
+              0.03685765,
+              -0.10744223,
+              -0.046515625,
+              0.0015449532,
+              -0.06319664,
+              -0.04640812,
+              -0.037318822,
+              -0.025718328,
+              -0.00026058854,
+              -0.011890766,
+              -0.050925612,
+              0.014111713,
+              0.029467698,
+              0.006379121,
+              -0.012013293,
+              -0.0024293982,
+              -0.044318773,
+              -0.08100101,
+              0.02009568,
+              0.055713937,
+              0.078816675,
+              0.054973654,
+              0.20367871,
+              -0.004309458,
+              0.03877001,
+              0.03825522,
+              -0.002538199,
+              -0.0007973801,
+              0.044761047,
+              -0.054529082,
+              -0.008856888,
+              -0.04078078,
+              0.011367262,
+              -0.022404457,
+              -0.06209053,
+              0.02558725,
+              -0.0034454092,
+              -0.03743928,
+              -0.062026348,
+              -0.030812219,
+              -0.034592565,
+              -0.014926672,
+              0.018588377,
+              0.013435887,
+              0.08169151,
+              0.053658403,
+              -0.03557856,
+              0.033325985,
+              -0.01637577,
+              -0.0222152,
+              -0.039247517,
+              0.00094368146,
+              0.10228945,
+              -0.04305617,
+              -0.052200828,
+              -0.02007385,
+              0.054805383,
+              -0.08231377,
+              -0.14736547,
+              0.048954617,
+              -0.0212168,
+              0.02872658,
+              -0.0671409,
+              0.021436114,
+              -0.023599947,
+              0.03677982,
+              0.010577411,
+              -0.0966004,
+              -0.06367233,
+              -0.10277648,
+              0.0273993,
+              -0.06292906,
+              -0.046344172,
+              0.039919835,
+              0.02682899,
+              0.025460077,
+              -0.013083559,
+              -0.002667712,
+              -0.016529463,
+              0.012605053,
+              -0.0064383023,
+              0.015841383,
+              -0.01710707,
+              0.12320292,
+              -0.0077660284,
+              0.05845043,
+              0.07362552,
+              0.038426086,
+              0.004742023,
+              -0.0155985365,
+              0.01418979,
+              0.07865995,
+              -0.026352523,
+              -0.037174653,
+              0.06787817,
+              -0.060126718,
+              0.06111402,
+              -0.034931272,
+              -0.009446326,
+              -0.006150886,
+              0.02892313,
+              -0.09361577,
+              0.0335364,
+              -0.09088912,
+              0.009241144,
+              0.07092964,
+              -0.08954648,
+              0.04494549,
+              0.040462427,
+              -0.04167353,
+              0.0076030386,
+              -0.0066417656,
+              -0.07275736,
+              -0.043690544,
+              0.07685007,
+              -1.0508795e-33,
+              -0.019583685,
+              -0.13087204,
+              -0.03574564,
+              0.070223756,
+              0.08133056,
+              -0.009436003,
+              0.046778366,
+              0.03478148,
+              -0.09441185,
+              -0.040857755,
+              -0.02127058,
+              -0.106959894,
+              0.024023255,
+              0.022780996,
+              -0.09042505,
+              -0.035755932,
+              0.011359196,
+              0.050059184,
+              0.0050815986,
+              -0.07676938,
+              0.05453651,
+              0.04191775,
+              -0.009206564,
+              -0.022437057,
+              -0.04617258,
+              -0.038608693,
+              -0.00036489012,
+              -0.025092375,
+              0.039146807,
+              -0.0072839926,
+              0.03675482,
+              -0.011301064,
+              -0.08863303,
+              0.059421506,
+              0.015851071,
+              0.033407707,
+              0.056883834,
+              -0.01203776,
+              0.027333334,
+              -0.009560535,
+              -0.05030555,
+              -0.009787559,
+              0.023205005,
+              -0.007937716,
+              0.003991047,
+              -0.036422852,
+              -0.06979188,
+              0.046075627,
+              0.056377746,
+              0.0071927872,
+              -0.00020658698,
+              0.017678235,
+              0.023745935,
+              -0.0031295705,
+              0.016370842,
+              0.027585855,
+              -0.03440131,
+              -0.05594279,
+              0.036442764,
+              0.03577988,
+              -0.005324585,
+              0.015240975,
+              -0.09071462,
+              0.072764605,
+              0.02343818,
+              -0.093097225,
+              0.05842133,
+              -0.061913762,
+              0.045556016,
+              0.07639311,
+              -0.035199754,
+              -0.009256856,
+              0.038682748,
+              -0.040795818,
+              0.017686425,
+              -0.025513103,
+              0.06860537,
+              0.085520275,
+              -0.1023457,
+              -0.0036474275,
+              -0.014826131,
+              -0.05045756,
+              -0.09065474,
+              -0.076476775,
+              -0.008538021,
+              -0.04111943,
+              -0.035473913,
+              -0.061549038,
+              0.114327826,
+              -0.09601482,
+              0.022990143,
+              0.0022396755,
+              -0.023026146,
+              -0.028128328,
+              0.07969127,
+              -4.1765383e-34,
+              0.07866384,
+              0.11484068,
+              0.016687382,
+              0.009315677,
+              0.01664128,
+              0.024303248,
+              0.046507504,
+              -0.043804675,
+              -0.09136995,
+              0.106353745,
+              -0.06948852,
+              0.018747667,
+              0.0053492193,
+              -0.033229355,
+              0.042339083,
+              -0.0017468681,
+              0.05323157,
+              0.0058223205,
+              -0.05331342,
+              0.016506517,
+              -0.02325185,
+              0.097519755,
+              -0.0045558517,
+              0.08866843,
+              -0.028221445,
+              -0.012007969,
+              -0.009742725,
+              0.061458003,
+              0.01574456,
+              -0.00039456616,
+              0.02444834,
+              0.065891184,
+              -0.054779086,
+              0.04863689,
+              0.043890025,
+              -0.062467597,
+              0.07615393,
+              0.0067509366,
+              0.019150084,
+              0.06994535,
+              0.027900916,
+              0.08902746,
+              -0.027433047,
+              0.031390887,
+              0.02271287,
+              0.08119532,
+              0.06855678,
+              0.0023552915,
+              -0.06764184,
+              0.00704173,
+              -0.034521427,
+              -0.053785548,
+              -0.03075216,
+              0.007947864,
+              -0.025317406,
+              -0.040664013,
+              0.036144093,
+              0.017730465,
+              -0.040179063,
+              0.013665757,
+              0.004815376,
+              0.009095556,
+              0.0072483593,
+              0.012753351,
+              -0.047865536,
+              -0.046072423,
+              -0.014048283,
+              0.031082962,
+              -0.034945205,
+              -0.023550391,
+              0.033062257,
+              -0.022966444,
+              0.007744228,
+              0.015939556,
+              -0.0012224894,
+              0.0010534802,
+              -0.015109,
+              -0.021597888,
+              -0.029862719,
+              0.03983828,
+              0.062536344,
+              0.0106168175,
+              -0.027220478,
+              0.02410377,
+              -0.0023566757,
+              0.085310005,
+              0.04843323,
+              0.090823516,
+              0.005126319,
+              0.020297319,
+              -0.01739127,
+              0.047677357,
+              0.11080086,
+              0.030030197,
+              0.029773563,
+              -1.5454503e-08,
+              -0.03580758,
+              -0.12177604,
+              0.019753791,
+              0.05854353,
+              -0.01590761,
+              0.085781366,
+              -0.09558486,
+              -0.0016744126,
+              0.00773199,
+              -0.04790156,
+              0.01175936,
+              0.006536077,
+              -0.032027386,
+              0.0031026274,
+              -0.07580574,
+              -0.039700802,
+              -0.00170645,
+              -0.070955865,
+              0.043680355,
+              0.029966798,
+              0.0039943648,
+              0.031923376,
+              0.08119928,
+              0.038820695,
+              0.013302812,
+              0.041675337,
+              0.044349737,
+              0.060403902,
+              -0.1058191,
+              -0.05287386,
+              0.050275758,
+              0.039101604,
+              0.0599918,
+              -0.025067834,
+              -0.019554066,
+              0.06748813,
+              0.12508559,
+              0.059007537,
+              -0.019899847,
+              -0.030194808,
+              -0.046559453,
+              0.034567222,
+              -0.021644907,
+              -0.03327634,
+              -0.0075667608,
+              -0.100658834,
+              -0.0639619,
+              -0.055270903,
+              -0.0111757815,
+              -0.11671873,
+              -0.07208087,
+              0.023208033,
+              0.027215267,
+              0.063635156,
+              -0.05858023,
+              0.020345282,
+              0.018325811,
+              -0.0036095325,
+              0.006916675,
+              0.06541716,
+              0.009575581,
+              0.046839867,
+              0.0070611075,
+              -0.09470841
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/54f7bec4d7073965af5f612d096c1c82f2602f796edcdbf8c9813a5a3a82825b.json b/tests/integration/vector_io/recordings/54f7bec4d7073965af5f612d096c1c82f2602f796edcdbf8c9813a5a3a82825b.json
new file mode 100644
index 000000000..5fc5d5051
--- /dev/null
+++ b/tests/integration/vector_io/recordings/54f7bec4d7073965af5f612d096c1c82f2602f796edcdbf8c9813a5a3a82825b.json
@@ -0,0 +1,39 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_retrieve_contents[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-06T16:41:45.231544-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/8158c78a51cf32f35b849dd054077757f7abbd584a52b47259fb0a903922eec0.json b/tests/integration/vector_io/recordings/8158c78a51cf32f35b849dd054077757f7abbd584a52b47259fb0a903922eec0.json
new file mode 100644
index 000000000..d95380b90
--- /dev/null
+++ b/tests/integration/vector_io/recordings/8158c78a51cf32f35b849dd054077757f7abbd584a52b47259fb0a903922eec0.json
@@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_create_and_retrieve[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 0"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.020637129,
+              0.048547756,
+              -0.12516363,
+              0.01991118,
+              -0.006535745,
+              -0.017178575,
+              0.027727997,
+              0.032170568,
+              -0.07302972,
+              0.008939002,
+              0.11493648,
+              0.0058907545,
+              0.0058539375,
+              -0.077171296,
+              -0.06883132,
+              0.0039748913,
+              -0.046849657,
+              -0.072902456,
+              -0.010890429,
+              -0.0019311906,
+              -0.011614798,
+              0.003689495,
+              -0.03695609,
+              -0.009029024,
+              0.017461002,
+              -0.004713484,
+              -0.010254731,
+              -0.026636763,
+              -0.026125714,
+              -0.046913657,
+              0.017024228,
+              0.0713477,
+              0.07881179,
+              0.03789051,
+              0.21716279,
+              -0.0077837943,
+              0.04686894,
+              0.020414647,
+              7.314368e-05,
+              0.0103133675,
+              0.059848394,
+              -0.04321678,
+              -0.011937493,
+              -0.021149047,
+              0.021315353,
+              -0.00072822213,
+              -0.046116166,
+              -0.0046820445,
+              0.016943695,
+              -0.03249135,
+              -0.055184096,
+              4.1543382e-05,
+              -0.034172166,
+              -0.023247559,
+              0.020267941,
+              0.012827845,
+              0.065036125,
+              0.07180022,
+              -0.013490698,
+              0.06376413,
+              -0.017730094,
+              -0.01806601,
+              -0.034191083,
+              0.008955718,
+              0.098446764,
+              -0.0061265854,
+              -0.06815829,
+              -0.039525956,
+              0.060588058,
+              -0.094874755,
+              -0.11774928,
+              0.019538416,
+              -0.014697532,
+              0.04773719,
+              -0.061298393,
+              0.030337377,
+              -0.0022184649,
+              0.019007793,
+              0.024370821,
+              -0.07063359,
+              -0.07582954,
+              -0.10816809,
+              0.031845964,
+              -0.057830192,
+              -0.04169559,
+              0.0752806,
+              0.019289386,
+              0.028845867,
+              0.0077010663,
+              0.013930818,
+              -0.067987345,
+              0.012679873,
+              -0.07907268,
+              0.0143718915,
+              -0.021433424,
+              0.11880779,
+              -0.016258432,
+              0.07099568,
+              0.035778854,
+              0.028776454,
+              0.013304291,
+              -0.05192297,
+              0.026758345,
+              0.10282426,
+              -0.003306269,
+              -0.03239622,
+              0.083044045,
+              -0.0412691,
+              0.043435257,
+              -0.043423533,
+              -0.013239603,
+              -0.0029038454,
+              0.038365215,
+              -0.10401672,
+              0.012744224,
+              -0.122984126,
+              -0.008942817,
+              0.06162198,
+              -0.120285526,
+              0.043005254,
+              0.04814879,
+              -0.036352232,
+              -0.003885529,
+              -0.018503373,
+              -0.088186465,
+              -0.0031517749,
+              0.09290919,
+              -1.1695094e-33,
+              -0.015589721,
+              -0.13189551,
+              0.008088751,
+              0.06899503,
+              0.07353927,
+              -0.030646399,
+              0.05110342,
+              0.03081624,
+              -0.07850498,
+              -0.021147482,
+              0.00017823944,
+              -0.10502706,
+              0.030078856,
+              0.02572523,
+              -0.068158925,
+              -0.025015576,
+              0.021830637,
+              0.049748335,
+              0.01520941,
+              -0.080153145,
+              0.06796621,
+              0.021865685,
+              -0.034017574,
+              -0.030821111,
+              -0.048006665,
+              0.0005615041,
+              -0.0137883695,
+              -0.04500587,
+              0.015368256,
+              -0.0043663937,
+              0.037706476,
+              0.0049090013,
+              -0.06216566,
+              0.03060772,
+              0.030548712,
+              0.029262561,
+              0.020701125,
+              0.0056516766,
+              0.010610447,
+              0.019530762,
+              -0.05664136,
+              -0.022654066,
+              -0.0010107337,
+              -0.020805702,
+              -0.012242364,
+              -0.05591731,
+              -0.049421698,
+              0.024721064,
+              0.05803342,
+              0.010474127,
+              -0.008790625,
+              0.025362873,
+              0.020258408,
+              0.004368581,
+              -0.01018003,
+              0.012385932,
+              -0.037656736,
+              -0.05642639,
+              0.020923307,
+              0.022813153,
+              -0.005735433,
+              0.015326356,
+              -0.108707875,
+              0.048076265,
+              0.023256551,
+              -0.10311626,
+              0.061980195,
+              -0.07340407,
+              0.051583096,
+              0.07360003,
+              -0.029443117,
+              -0.014564469,
+              0.042043358,
+              -0.020252181,
+              0.0147808045,
+              -0.0285806,
+              0.07891856,
+              0.056849223,
+              -0.106308356,
+              0.0197874,
+              0.0269322,
+              -0.04749746,
+              -0.066681586,
+              -0.10474516,
+              0.012599429,
+              -0.056163482,
+              -0.04901015,
+              -0.04571026,
+              0.09704481,
+              -0.105899766,
+              0.044303197,
+              -0.020125533,
+              -0.0368709,
+              -0.015417924,
+              0.042297333,
+              -8.289866e-35,
+              0.07415767,
+              0.10998298,
+              -0.016995763,
+              0.01066263,
+              -0.0012327223,
+              0.028000232,
+              0.0714317,
+              -0.02320065,
+              -0.07778205,
+              0.11864239,
+              -0.016559754,
+              0.037961867,
+              0.02930022,
+              -0.008237686,
+              0.059777655,
+              0.008086454,
+              0.02075205,
+              0.025284613,
+              -0.055471037,
+              0.0073576584,
+              -0.013398135,
+              0.11896543,
+              -0.014611002,
+              0.07691816,
+              -0.019711656,
+              -0.01920917,
+              -0.004744884,
+              0.08173054,
+              0.019665759,
+              -0.013193461,
+              0.06215852,
+              0.07420406,
+              -0.073212065,
+              0.036052067,
+              0.07328616,
+              -0.057373393,
+              0.08346425,
+              0.018834447,
+              0.03309735,
+              0.041197047,
+              0.033917964,
+              0.09151449,
+              -0.051731598,
+              0.049615093,
+              0.01124018,
+              0.06661862,
+              0.07268375,
+              -0.013245848,
+              -0.039673895,
+              -0.012173254,
+              0.0017787582,
+              -0.05746287,
+              -0.013884767,
+              0.020205025,
+              -0.029692367,
+              -0.031010685,
+              0.0149556715,
+              0.026381323,
+              -0.025382591,
+              0.0074336748,
+              -0.00949915,
+              0.015655186,
+              -0.0012397208,
+              -0.0032508406,
+              -0.046632554,
+              -0.0030316226,
+              -0.007273208,
+              0.064231135,
+              -0.034431897,
+              -0.06433184,
+              0.045421343,
+              -0.010773523,
+              -0.017881984,
+              0.010312532,
+              -0.024369273,
+              -0.008478495,
+              -0.02457377,
+              -0.0263535,
+              -0.027263613,
+              0.047060315,
+              0.08128726,
+              0.0045517692,
+              -0.010821656,
+              0.026526682,
+              0.018961033,
+              0.059243083,
+              0.001561823,
+              0.09838158,
+              0.00822081,
+              0.008796511,
+              -0.0060577285,
+              0.028892087,
+              0.08253284,
+              0.049560018,
+              0.023363132,
+              -1.498271e-08,
+              -0.036891207,
+              -0.10629833,
+              0.030452948,
+              0.049268734,
+              -0.0030453752,
+              0.07413954,
+              -0.07043819,
+              -0.034285706,
+              -0.009679971,
+              -0.046219327,
+              0.013510038,
+              -0.018686565,
+              -0.048570327,
+              0.0028313443,
+              -0.06190722,
+              -0.053201936,
+              0.0060967463,
+              -0.043467365,
+              0.042226154,
+              0.03455835,
+              -0.0375257,
+              0.023590367,
+              0.054896712,
+              0.029878648,
+              0.019286606,
+              0.026097741,
+              0.06938145,
+              0.06272366,
+              -0.09566521,
+              -0.07481147,
+              0.025204772,
+              0.039396077,
+              0.036375154,
+              -0.01104443,
+              -0.028223084,
+              0.111878626,
+              0.13400707,
+              0.06680113,
+              -0.011737675,
+              -0.03585406,
+              -0.07978788,
+              0.032793757,
+              -0.0021075818,
+              -0.028365146,
+              -0.042218164,
+              -0.08132239,
+              -0.0753423,
+              -0.043771427,
+              -0.015633285,
+              -0.14193884,
+              -0.055949364,
+              0.025526602,
+              -0.023186589,
+              0.061106257,
+              -0.056208834,
+              0.00838827,
+              0.014720396,
+              -0.014650135,
+              -0.012830787,
+              0.08434067,
+              0.024660436,
+              0.05366935,
+              0.005782819,
+              -0.10599063
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/943a7db9bab0934c95417e8befe710b364496c1ee21a75258205830e1df7221b.json b/tests/integration/vector_io/recordings/943a7db9bab0934c95417e8befe710b364496c1ee21a75258205830e1df7221b.json
new file mode 100644
index 000000000..c4338e8df
--- /dev/null
+++ b/tests/integration/vector_io/recordings/943a7db9bab0934c95417e8befe710b364496c1ee21a75258205830e1df7221b.json
@@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_cancel[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch cancel test file 0 with substantial content"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.010706507,
+              0.11740309,
+              -0.070396945,
+              0.036590267,
+              0.03445541,
+              -0.037278067,
+              0.033794403,
+              -0.013823747,
+              -0.032249726,
+              0.039381154,
+              0.09738964,
+              0.043944314,
+              -0.015195914,
+              -0.08339148,
+              -0.12092182,
+              -0.0144716315,
+              -0.06525938,
+              0.008907217,
+              -0.016506711,
+              -0.011929026,
+              -0.0519942,
+              0.07381637,
+              0.028294124,
+              0.056386005,
+              0.028838597,
+              0.02860147,
+              -0.046813786,
+              -0.018329943,
+              -0.037620317,
+              -0.06344129,
+              0.037448265,
+              0.0807444,
+              0.08218735,
+              -0.018610513,
+              0.16465282,
+              0.006478139,
+              0.009052014,
+              0.024081843,
+              0.04604129,
+              -0.016105218,
+              0.050088186,
+              -0.014189308,
+              -0.055208918,
+              -0.024689473,
+              0.009216049,
+              0.0032953622,
+              -0.08004139,
+              -0.050898325,
+              0.030319132,
+              0.0038868543,
+              -0.03242241,
+              -0.008002084,
+              -0.05405017,
+              0.0034951256,
+              0.026613077,
+              -0.03749797,
+              0.074383445,
+              0.05947148,
+              -0.037571322,
+              0.07424358,
+              -0.031258598,
+              -0.010979168,
+              -0.115162514,
+              0.016076973,
+              0.12323825,
+              0.057677355,
+              -0.08872208,
+              -0.028623635,
+              0.05342226,
+              -0.060159575,
+              -0.07479101,
+              -0.01794232,
+              -0.0049816607,
+              0.08948416,
+              -0.042007502,
+              0.0925552,
+              -0.016678093,
+              0.013261441,
+              -0.0068968083,
+              0.00078877964,
+              -0.070652686,
+              -0.14053895,
+              0.054617904,
+              -0.064937904,
+              -0.036082774,
+              0.04364618,
+              0.039191015,
+              0.009325763,
+              0.055350192,
+              0.007441803,
+              -0.04520714,
+              0.0070686075,
+              0.029522296,
+              0.016590035,
+              -0.020568646,
+              0.083674796,
+              0.0076218233,
+              0.006881344,
+              0.013654858,
+              0.03697504,
+              0.04504176,
+              -0.012595865,
+              -0.006368664,
+              -0.006188894,
+              -0.02347456,
+              -0.014876863,
+              0.07330545,
+              -0.008524341,
+              0.03080002,
+              -0.079184264,
+              -0.002168809,
+              -0.04496155,
+              0.02353669,
+              -0.061784163,
+              0.019026963,
+              -0.034334134,
+              0.07823938,
+              0.086644776,
+              -0.100164026,
+              0.00979978,
+              0.043132447,
+              -0.00027732752,
+              -0.007950898,
+              -0.03439145,
+              -0.07176784,
+              -0.010847044,
+              0.10318583,
+              1.28398045e-33,
+              -0.057539165,
+              -0.10064088,
+              -0.036363184,
+              0.070467934,
+              0.12267441,
+              0.023121687,
+              0.036528632,
+              0.043095388,
+              -0.053614546,
+              0.034320176,
+              -0.015772322,
+              -0.07880764,
+              0.019716268,
+              0.017762613,
+              -0.094458655,
+              -0.08139035,
+              0.027233537,
+              0.07888667,
+              -0.024265131,
+              -0.054107342,
+              0.11021126,
+              -0.016241824,
+              -0.05417309,
+              -0.028439889,
+              -0.027373016,
+              -0.01668086,
+              -0.031238388,
+              -0.03203346,
+              0.017995317,
+              -0.011522754,
+              -0.0029258654,
+              0.022844825,
+              -0.019639384,
+              0.05111425,
+              -0.0015511515,
+              0.04084381,
+              0.0043716393,
+              -0.05789265,
+              0.024110112,
+              0.03920258,
+              -0.08151888,
+              -0.008190904,
+              -0.0645496,
+              -0.014420588,
+              0.00016276255,
+              -0.10466175,
+              -0.015631696,
+              -0.054435816,
+              0.03390489,
+              0.042083304,
+              0.041493565,
+              0.033552594,
+              0.027098974,
+              -0.035584476,
+              -0.025616122,
+              0.015369336,
+              0.025080213,
+              -0.047622968,
+              0.0076927147,
+              0.048611037,
+              0.07658855,
+              0.030115629,
+              -0.10192636,
+              0.009031788,
+              -0.026905872,
+              -0.07093241,
+              0.009540495,
+              -0.0967732,
+              0.006907292,
+              0.008907563,
+              -0.036709655,
+              -0.0074325944,
+              0.06927971,
+              -0.044891518,
+              -0.0022573345,
+              -0.05632572,
+              0.03744841,
+              0.026788702,
+              -0.00916575,
+              0.008179489,
+              0.08744597,
+              -0.046512436,
+              -0.061149366,
+              -0.13555244,
+              0.0010608839,
+              -0.06323009,
+              -0.039003603,
+              -0.07015582,
+              0.03916791,
+              -0.07763432,
+              -0.00032964678,
+              -0.026286542,
+              -0.053487364,
+              0.009920836,
+              0.104119115,
+              -1.9471978e-33,
+              0.04772588,
+              0.04490678,
+              -0.04262699,
+              0.03524018,
+              -0.003943472,
+              0.033365145,
+              0.06762878,
+              -0.021556355,
+              -0.043953415,
+              0.023543492,
+              0.005500359,
+              0.03756542,
+              0.025656395,
+              -0.014806406,
+              0.01845547,
+              0.015662882,
+              0.06915146,
+              0.010516805,
+              -0.08958506,
+              0.008974718,
+              -0.035460126,
+              0.05160542,
+              0.01763933,
+              0.067841165,
+              -0.02522728,
+              -0.022180483,
+              -0.085712284,
+              0.061407775,
+              0.07101853,
+              -0.0015686463,
+              0.055281166,
+              0.04126171,
+              -0.04599903,
+              -0.037977487,
+              0.09936549,
+              -0.064348385,
+              0.07501729,
+              0.06690245,
+              0.01264843,
+              0.011582279,
+              0.06661292,
+              0.083571374,
+              -0.05528334,
+              0.03757593,
+              0.043382253,
+              0.059041474,
+              0.056976013,
+              -0.02765602,
+              -0.00018057597,
+              -0.010140114,
+              -0.023275468,
+              -0.040977187,
+              -0.0051338123,
+              0.06462851,
+              -0.015096949,
+              -0.04108323,
+              0.013806998,
+              -0.013243718,
+              -0.04096836,
+              -0.021470992,
+              0.0037039437,
+              0.04606251,
+              0.027378108,
+              -0.009201031,
+              0.024913032,
+              0.027817363,
+              0.011912681,
+              0.072464235,
+              -0.04599433,
+              -0.033524342,
+              0.031872187,
+              -0.0017134893,
+              -0.030329237,
+              0.021338675,
+              0.050125677,
+              -0.006607719,
+              0.005844466,
+              -0.049508642,
+              2.296406e-05,
+              0.033044532,
+              0.07586271,
+              0.0094868485,
+              -0.0023229877,
+              0.063257135,
+              0.0073867897,
+              0.067748606,
+              -0.088573374,
+              0.06831021,
+              0.0047544846,
+              0.08063805,
+              -0.02170177,
+              0.020645779,
+              0.082571074,
+              0.039116666,
+              0.03906674,
+              -1.756136e-08,
+              -0.01928442,
+              -0.123927765,
+              0.0188664,
+              0.03889619,
+              0.003943178,
+              0.017261649,
+              -0.072421774,
+              0.010595731,
+              -0.032426827,
+              -0.07068102,
+              0.027171727,
+              -0.032465994,
+              -0.03428293,
+              0.00012704723,
+              -0.07441139,
+              -0.061249517,
+              0.003310212,
+              -0.030616615,
+              0.037538156,
+              0.013060206,
+              -0.02899822,
+              0.002607385,
+              0.023053044,
+              -0.008261543,
+              0.027366797,
+              0.041916996,
+              0.07509514,
+              0.093088634,
+              -0.05660954,
+              -0.10259794,
+              0.041243467,
+              -0.025973666,
+              0.013900956,
+              0.0023358895,
+              -0.075266555,
+              0.07490993,
+              0.14500652,
+              0.04697599,
+              -0.03860971,
+              0.009254478,
+              -0.06991552,
+              0.011762797,
+              0.02150895,
+              0.010407091,
+              -0.016874894,
+              -0.057741348,
+              -0.075219,
+              -0.07250321,
+              -0.03090426,
+              -0.110799745,
+              -0.024827298,
+              0.0065941666,
+              -0.027638538,
+              0.08827356,
+              -0.044589255,
+              -0.04193462,
+              0.021976525,
+              0.015851181,
+              -0.07105447,
+              0.106275305,
+              0.058465168,
+              0.0026831257,
+              -0.006616897,
+              -0.086507544
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 10,
+          "total_tokens": 10
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/995712d2e4441339fdd8ca21d87747c9983b0d40cc83fcfd90c5e733ecfb5a35.json b/tests/integration/vector_io/recordings/995712d2e4441339fdd8ca21d87747c9983b0d40cc83fcfd90c5e733ecfb5a35.json
new file mode 100644
index 000000000..eb070a328
--- /dev/null
+++ b/tests/integration/vector_io/recordings/995712d2e4441339fdd8ca21d87747c9983b0d40cc83fcfd90c5e733ecfb5a35.json
@@ -0,0 +1,39 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_cancel[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-06T16:40:13.262640-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/da1e7b0f80936e70deaa09b6678d0f2756377d5ed83818898fd4c4c67df8ade6.json b/tests/integration/vector_io/recordings/da1e7b0f80936e70deaa09b6678d0f2756377d5ed83818898fd4c4c67df8ade6.json
new file mode 100644
index 000000000..938e83cf0
--- /dev/null
+++ b/tests/integration/vector_io/recordings/da1e7b0f80936e70deaa09b6678d0f2756377d5ed83818898fd4c4c67df8ade6.json
@@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_cancel[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch cancel test file 1 with substantial content"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.024848156,
+              0.10927085,
+              -0.0545053,
+              0.038470518,
+              0.046556868,
+              -0.034411646,
+              0.04878781,
+              -0.014318634,
+              -0.046015147,
+              0.044597667,
+              0.09629065,
+              0.058968317,
+              -0.007982022,
+              -0.10140896,
+              -0.10389055,
+              -0.019553911,
+              -0.07593768,
+              0.025729634,
+              -0.029175822,
+              -0.02637171,
+              -0.050457876,
+              0.066799924,
+              0.022711048,
+              0.06541894,
+              0.041600663,
+              0.030976223,
+              -0.056684654,
+              -0.0035002322,
+              -0.050632603,
+              -0.08931927,
+              0.040150054,
+              0.06798157,
+              0.08541512,
+              -0.0107848635,
+              0.15392521,
+              0.009335848,
+              0.010962297,
+              0.029146858,
+              0.047823314,
+              -0.026440151,
+              0.04159767,
+              -0.010160618,
+              -0.05779408,
+              -0.039702307,
+              -0.004494967,
+              -0.004617397,
+              -0.08862508,
+              -0.034483556,
+              0.024042498,
+              0.0051155766,
+              -0.0317056,
+              -0.01699217,
+              -0.053024635,
+              0.015636722,
+              0.03557156,
+              -0.039649993,
+              0.081902996,
+              0.06176357,
+              -0.05502012,
+              0.06357122,
+              -0.030193875,
+              -0.012515638,
+              -0.12543206,
+              0.012063709,
+              0.12448795,
+              0.040869392,
+              -0.07753088,
+              -0.021475459,
+              0.04500842,
+              -0.056871727,
+              -0.09496841,
+              -0.01180043,
+              -0.0017254521,
+              0.08008634,
+              -0.047713377,
+              0.08782804,
+              -0.02004271,
+              0.033268984,
+              -0.016207146,
+              -0.010731495,
+              -0.063805684,
+              -0.14302677,
+              0.0575187,
+              -0.06904251,
+              -0.037962824,
+              0.0182337,
+              0.042886198,
+              0.01039097,
+              0.044122625,
+              -0.0020459748,
+              -0.014757414,
+              0.0011372506,
+              0.07999029,
+              0.018020395,
+              -0.018433796,
+              0.07817236,
+              0.012330995,
+              0.007078602,
+              0.03731257,
+              0.03993665,
+              0.039117657,
+              0.0077354074,
+              -0.009170466,
+              -0.018691367,
+              -0.028763011,
+              -0.019665359,
+              0.062140632,
+              -0.020356707,
+              0.038877316,
+              -0.08305566,
+              0.00014209712,
+              -0.05700167,
+              0.021387467,
+              -0.054998472,
+              0.03538585,
+              -0.023105556,
+              0.089621656,
+              0.09418147,
+              -0.08390289,
+              0.009763535,
+              0.043676704,
+              -0.0022283366,
+              0.00070641236,
+              -0.03374215,
+              -0.07274797,
+              -0.034256138,
+              0.09228734,
+              1.2329422e-33,
+              -0.06229734,
+              -0.10348473,
+              -0.05939012,
+              0.07817319,
+              0.12856846,
+              0.03253048,
+              0.03706221,
+              0.03843275,
+              -0.06781762,
+              0.027851813,
+              -0.03286515,
+              -0.07305933,
+              0.011496317,
+              0.016992282,
+              -0.10859345,
+              -0.089275,
+              0.02053902,
+              0.07540007,
+              -0.030434899,
+              -0.057486024,
+              0.1028371,
+              -0.011332772,
+              -0.040277272,
+              -0.022627348,
+              -0.029583039,
+              -0.042487655,
+              -0.01710431,
+              -0.028937005,
+              0.034644134,
+              -0.015131404,
+              -0.005402634,
+              0.0111823045,
+              -0.024323324,
+              0.061144948,
+              -0.0068504023,
+              0.04550556,
+              0.017341396,
+              -0.063010655,
+              0.033939265,
+              0.029030005,
+              -0.07075115,
+              0.0076140417,
+              -0.056033216,
+              -0.01839173,
+              0.006444027,
+              -0.10148905,
+              -0.024238782,
+              -0.045753844,
+              0.029873326,
+              0.03732028,
+              0.05342056,
+              0.024428835,
+              0.03200607,
+              -0.045322895,
+              -0.009412481,
+              0.01895284,
+              0.026068604,
+              -0.043451786,
+              0.017836504,
+              0.060751975,
+              0.0770648,
+              0.037520513,
+              -0.094844334,
+              0.018022675,
+              -0.028010713,
+              -0.05970307,
+              0.0042470302,
+              -0.08537647,
+              0.0025366507,
+              0.0059753954,
+              -0.040670317,
+              -0.008420785,
+              0.070101276,
+              -0.05581281,
+              0.009997155,
+              -0.053269707,
+              0.030278698,
+              0.034753144,
+              -0.0069992156,
+              -0.0018294669,
+              0.052869115,
+              -0.047554925,
+              -0.07009094,
+              -0.12028551,
+              -0.016411684,
+              -0.0558196,
+              -0.026485136,
+              -0.07406597,
+              0.052336086,
+              -0.07966716,
+              -0.009600498,
+              -0.016012779,
+              -0.04670444,
+              0.0040856744,
+              0.13087922,
+              -1.9130171e-33,
+              0.04951988,
+              0.04144521,
+              -0.030660233,
+              0.02966906,
+              -0.0019053655,
+              0.038034633,
+              0.053598672,
+              -0.03873592,
+              -0.050682254,
+              0.0163216,
+              -0.018117629,
+              0.02705123,
+              0.014957701,
+              -0.029251544,
+              0.010732444,
+              0.01150037,
+              0.08527361,
+              0.000666767,
+              -0.09031944,
+              0.007236525,
+              -0.0394124,
+              0.032647807,
+              0.029387591,
+              0.0696317,
+              -0.028400488,
+              -0.019728381,
+              -0.08580391,
+              0.050916594,
+              0.07555233,
+              0.0013333871,
+              0.036405865,
+              0.03485496,
+              -0.035891958,
+              -0.03518406,
+              0.08422707,
+              -0.07100648,
+              0.066512264,
+              0.0566844,
+              0.005254722,
+              0.026210023,
+              0.06271422,
+              0.07715752,
+              -0.042685844,
+              0.029498853,
+              0.048694577,
+              0.06829996,
+              0.05471948,
+              -0.014717811,
+              -0.0084376065,
+              -0.007800526,
+              -0.033968475,
+              -0.035792083,
+              -0.01680357,
+              0.056615632,
+              -0.008940466,
+              -0.044396702,
+              0.033141203,
+              -0.020710811,
+              -0.052891865,
+              -0.012946567,
+              0.013425288,
+              0.045469046,
+              0.02655372,
+              -7.159544e-06,
+              0.033383444,
+              0.012771919,
+              0.0050781234,
+              0.05739414,
+              -0.05292731,
+              -0.009027621,
+              0.019719183,
+              -0.0046205786,
+              -0.012921344,
+              0.021115582,
+              0.063510135,
+              0.006540324,
+              0.008657973,
+              -0.044172782,
+              -0.0010352373,
+              0.025917202,
+              0.07357742,
+              0.012915724,
+              -0.010159995,
+              0.05862044,
+              0.0032137444,
+              0.08368076,
+              -0.06552963,
+              0.06294139,
+              0.004963379,
+              0.08497223,
+              -0.030302247,
+              0.028541481,
+              0.103464715,
+              0.03432187,
+              0.039947473,
+              -1.757192e-08,
+              -0.020163277,
+              -0.12507844,
+              0.015846072,
+              0.038265407,
+              -0.0031526515,
+              0.01804952,
+              -0.0817553,
+              0.030486222,
+              -0.02073271,
+              -0.069118954,
+              0.0252006,
+              -0.016496325,
+              -0.018695008,
+              -0.0063493066,
+              -0.08448383,
+              -0.05474651,
+              0.008191211,
+              -0.04699509,
+              0.03820692,
+              0.019186925,
+              -0.006977571,
+              -0.0002934883,
+              0.030278133,
+              -0.009153849,
+              0.030300315,
+              0.04737054,
+              0.06026962,
+              0.09765302,
+              -0.05529498,
+              -0.09553832,
+              0.06008278,
+              -0.025960611,
+              0.034287665,
+              -0.012333093,
+              -0.07106284,
+              0.05141244,
+              0.14179605,
+              0.04709826,
+              -0.049292527,
+              0.014455253,
+              -0.047851674,
+              0.011403938,
+              0.014072481,
+              0.010494679,
+              -0.0009859774,
+              -0.06089218,
+              -0.07293921,
+              -0.07961594,
+              -0.03404924,
+              -0.10086713,
+              -0.031331882,
+              0.0042822976,
+              -0.0045380252,
+              0.09583955,
+              -0.044172354,
+              -0.034359995,
+              0.023726532,
+              0.02167657,
+              -0.06509328,
+              0.09268318,
+              0.055370033,
+              0.003980954,
+              -0.0053826002,
+              -0.07774321
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 10,
+          "total_tokens": 10
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/recordings/ffea5475c2625b87e302ec419cc536f34da3ce7e80eba86bec16d231aa347d00.json b/tests/integration/vector_io/recordings/ffea5475c2625b87e302ec419cc536f34da3ce7e80eba86bec16d231aa347d00.json
new file mode 100644
index 000000000..6d9080f89
--- /dev/null
+++ b/tests/integration/vector_io/recordings/ffea5475c2625b87e302ec419cc536f34da3ce7e80eba86bec16d231aa347d00.json
@@ -0,0 +1,20 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_create_and_retrieve[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": []
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 0c60acd27..e850f2aee 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -902,3 +902,290 @@ def test_openai_vector_store_search_modes(llama_stack_client, client_with_models
         search_mode=search_mode,
     )
     assert search_response is not None
+
+
+def test_openai_vector_store_file_batch_create_and_retrieve(compat_client_with_empty_stores, client_with_models):
+    """Test creating and retrieving a vector store file batch."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_test_store")
+
+    # Create multiple files
+    file_ids = []
+    for i in range(2):
+        with BytesIO(f"This is batch test file {i}".encode()) as file_buffer:
+            file_buffer.name = f"batch_test_{i}.txt"
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    assert batch is not None
+    assert batch.object == "vector_store.file_batch"
+    assert batch.vector_store_id == vector_store.id
+    assert batch.status in ["in_progress", "completed"]
+    assert batch.file_counts.total == len(file_ids)
+    assert hasattr(batch, "id")
+    assert hasattr(batch, "created_at")
+
+    # Wait for batch processing to complete
+    max_retries = 60  # 60 seconds max wait (increased for file processing delays)
+    retries = 0
+    retrieved_batch = None
+    while retries < max_retries:
+        retrieved_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        if retrieved_batch.status in ["completed", "failed"]:
+            break
+        time.sleep(1)
+        retries += 1
+
+    assert retrieved_batch is not None
+    assert retrieved_batch.id == batch.id
+    assert retrieved_batch.vector_store_id == vector_store.id
+    assert retrieved_batch.object == "vector_store.file_batch"
+    assert retrieved_batch.file_counts.total == len(file_ids)
+    assert retrieved_batch.status == "completed"  # Should be completed after processing
+
+
+def test_openai_vector_store_file_batch_list_files(compat_client_with_empty_stores, client_with_models):
+    """Test listing files in a vector store file batch."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_list_test_store")
+
+    # Create multiple files
+    file_ids = []
+    for i in range(2):
+        with BytesIO(f"This is batch list test file {i}".encode()) as file_buffer:
+            file_buffer.name = f"batch_list_test_{i}.txt"
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    # Wait for batch processing to complete
+    max_retries = 60  # 60 seconds max wait (increased for file processing delays)
+    retries = 0
+    while retries < max_retries:
+        retrieved_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        if retrieved_batch.status in ["completed", "failed"]:
+            break
+        time.sleep(1)
+        retries += 1
+
+    # List all files in the batch
+    files_response = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+    )
+
+    assert files_response is not None
+    assert files_response.object == "list"
+    assert hasattr(files_response, "data")
+    assert len(files_response.data) == len(file_ids)
+
+    # Verify all files are in the response
+    response_file_ids = {file.id for file in files_response.data}
+    assert response_file_ids == set(file_ids)
+
+    # Test pagination with limit
+    limited_response = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+        limit=3,
+    )
+
+    assert len(limited_response.data) == 2
+    assert limited_response.has_more is False
+
+    # Test pagination with after cursor
+    first_page = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+        limit=2,
+    )
+
+    second_page = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+        limit=2,
+        after=first_page.data[-1].id,
+    )
+
+    assert len(first_page.data) == 2
+    assert len(second_page.data) <= 3  # Should be <= remaining files
+    # Ensure no overlap between pages
+    first_page_ids = {file.id for file in first_page.data}
+    second_page_ids = {file.id for file in second_page.data}
+    assert first_page_ids.isdisjoint(second_page_ids)
+
+
+def test_openai_vector_store_file_batch_cancel(compat_client_with_empty_stores, client_with_models):
+    """Test cancelling a vector store file batch."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_cancel_test_store")
+
+    # Create a batch to test cancellation
+    file_ids = []
+    for i in range(2):  # Batch size that allows time for cancellation
+        with BytesIO(f"This is batch cancel test file {i} with substantial content".encode()) as file_buffer:
+            file_buffer.name = f"batch_cancel_test_{i}.txt"
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    try:
+        # Cancel the batch immediately after creation
+        cancelled_batch = compat_client.vector_stores.file_batches.cancel(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+
+        assert cancelled_batch is not None
+        assert cancelled_batch.id == batch.id
+        assert cancelled_batch.vector_store_id == vector_store.id
+        assert cancelled_batch.status == "cancelled"
+        assert cancelled_batch.object == "vector_store.file_batch"
+    except Exception:
+        # If cancellation fails (e.g., batch completed too quickly),
+        # verify the batch reached completion instead
+        final_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        assert final_batch.status in ["completed", "cancelled"]
+
+
+def test_openai_vector_store_file_batch_retrieve_contents(compat_client_with_empty_stores, client_with_models):
+    """Test retrieving file contents after file batch processing."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_contents_test_store")
+
+    # Create multiple files with known content
+    file_data = [
+        ("test_file_1.txt", b"This is the content of test file 1"),
+        ("test_file_2.txt", b"This is the content of test file 2"),
+    ]
+
+    file_ids = []
+    for filename, content in file_data:
+        with BytesIO(content) as file_buffer:
+            file_buffer.name = filename
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    # Wait for batch processing to complete
+    max_retries = 60  # 60 seconds max wait (increased for file processing delays)
+    retries = 0
+    while retries < max_retries:
+        retrieved_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        if retrieved_batch.status in ["completed", "failed"]:
+            break
+        time.sleep(1)
+        retries += 1
+
+    assert retrieved_batch.status == "completed"
+
+    # Retrieve file contents for each file in the batch
+    for i, file_id in enumerate(file_ids):
+        file_contents = compat_client.vector_stores.files.content(
+            vector_store_id=vector_store.id,
+            file_id=file_id,
+        )
+
+        assert file_contents is not None
+        assert file_contents.filename == file_data[i][0]
+        assert len(file_contents.content) > 0
+
+        # Verify the content matches what we uploaded
+        content_text = (
+            file_contents.content[0].text
+            if hasattr(file_contents.content[0], "text")
+            else file_contents.content[0]["text"]
+        )
+        assert file_data[i][1].decode("utf-8") in content_text
+
+
+def test_openai_vector_store_file_batch_error_handling(compat_client_with_empty_stores, client_with_models):
+    """Test error handling for file batch operations."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_error_test_store")
+
+    # Test with invalid file IDs (should handle gracefully)
+    file_ids = ["invalid_file_id_1", "invalid_file_id_2"]
+
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    assert batch is not None
+    assert batch.file_counts.total == len(file_ids)
+    # Invalid files should be marked as failed
+    assert batch.file_counts.failed >= 0  # Implementation may vary
+
+    # Determine expected errors based on client type
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        errors = ValueError
+    else:
+        errors = (BadRequestError, OpenAIBadRequestError)
+
+    # Test retrieving non-existent batch
+    with pytest.raises(errors):  # Should raise an error for non-existent batch
+        compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id="non_existent_batch_id",
+        )
+
+    # Test operations on non-existent vector store
+    with pytest.raises(errors):  # Should raise an error for non-existent vector store
+        compat_client.vector_stores.file_batches.create(
+            vector_store_id="non_existent_vector_store",
+            file_ids=["any_file_id"],
+        )
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 98889f38e..c8b77ea67 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -6,16 +6,22 @@
 
 import json
 import time
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, patch
 
 import numpy as np
 import pytest
 
+from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack.apis.vector_io import (
+    Chunk,
+    QueryChunksResponse,
+    VectorStoreChunkingStrategyAuto,
+    VectorStoreFileObject,
+)
 from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREFIX
 
-# This test is a unit test for the inline VectoerIO providers. This should only contain
+# This test is a unit test for the inline VectorIO providers. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
 # tests/integration/vector_io/
 #
@@ -25,6 +31,16 @@ from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREF
 # -v -s --tb=short --disable-warnings --asyncio-mode=auto
 
 
+@pytest.fixture(autouse=True)
+def mock_resume_file_batches(request):
+    """Mock the resume functionality to prevent stale file batches from being processed during tests."""
+    with patch(
+        "llama_stack.providers.utils.memory.openai_vector_store_mixin.OpenAIVectorStoreMixin._resume_incomplete_batches",
+        new_callable=AsyncMock,
+    ):
+        yield
+
+
 async def test_initialize_index(vector_index):
     await vector_index.initialize()
 
@@ -294,3 +310,668 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
     assert loaded_file_info == {}
     loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
     assert loaded_contents == []
+
+
+async def test_create_vector_store_file_batch(vector_io_adapter):
+    """Test creating a file batch."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2", "file_3"]
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Mock attach method and batch processing to avoid actual processing
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    assert batch.vector_store_id == store_id
+    assert batch.status == "in_progress"
+    assert batch.file_counts.total == len(file_ids)
+    assert batch.file_counts.in_progress == len(file_ids)
+    assert batch.id in vector_io_adapter.openai_file_batches
+
+
+async def test_retrieve_vector_store_file_batch(vector_io_adapter):
+    """Test retrieving a file batch."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2"]
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+
+    # Create batch first
+    created_batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # Retrieve batch
+    retrieved_batch = await vector_io_adapter.openai_retrieve_vector_store_file_batch(
+        batch_id=created_batch.id,
+        vector_store_id=store_id,
+    )
+
+    assert retrieved_batch.id == created_batch.id
+    assert retrieved_batch.vector_store_id == store_id
+    assert retrieved_batch.status == "in_progress"
+
+
+async def test_cancel_vector_store_file_batch(vector_io_adapter):
+    """Test cancelling a file batch."""
+    store_id = "vs_1234"
+    file_ids = ["file_1"]
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Mock both file attachment and batch processing to prevent automatic completion
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # Cancel batch
+    cancelled_batch = await vector_io_adapter.openai_cancel_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+    )
+
+    assert cancelled_batch.status == "cancelled"
+
+
+async def test_list_files_in_vector_store_file_batch(vector_io_adapter):
+    """Test listing files in a batch."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2"]
+
+    # Setup vector store with files
+    files = {}
+    for i, file_id in enumerate(file_ids):
+        files[file_id] = VectorStoreFileObject(
+            id=file_id,
+            object="vector_store.file",
+            usage_bytes=1000,
+            created_at=int(time.time()) + i,
+            vector_store_id=store_id,
+            status="completed",
+            chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        )
+
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": files,
+        "file_ids": file_ids,
+    }
+
+    # Mock file loading
+    vector_io_adapter._load_openai_vector_store_file = AsyncMock(
+        side_effect=lambda vs_id, f_id: files[f_id].model_dump()
+    )
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # List files
+    response = await vector_io_adapter.openai_list_files_in_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+    )
+
+    assert len(response.data) == len(file_ids)
+    assert response.first_id is not None
+    assert response.last_id is not None
+
+
+async def test_file_batch_validation_errors(vector_io_adapter):
+    """Test file batch validation errors."""
+    # Test nonexistent vector store
+    with pytest.raises(VectorStoreNotFoundError):
+        await vector_io_adapter.openai_create_vector_store_file_batch(
+            vector_store_id="nonexistent",
+            file_ids=["file_1"],
+        )
+
+    # Setup store for remaining tests
+    store_id = "vs_test"
+    vector_io_adapter.openai_vector_stores[store_id] = {"id": store_id, "files": {}, "file_ids": []}
+
+    # Test nonexistent batch
+    with pytest.raises(ValueError, match="File batch .* not found"):
+        await vector_io_adapter.openai_retrieve_vector_store_file_batch(
+            batch_id="nonexistent_batch",
+            vector_store_id=store_id,
+        )
+
+    # Test wrong vector store for batch
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=["file_1"],
+    )
+
+    # Create wrong_store so it exists but the batch doesn't belong to it
+    wrong_store_id = "wrong_store"
+    vector_io_adapter.openai_vector_stores[wrong_store_id] = {"id": wrong_store_id, "files": {}, "file_ids": []}
+
+    with pytest.raises(ValueError, match="does not belong to vector store"):
+        await vector_io_adapter.openai_retrieve_vector_store_file_batch(
+            batch_id=batch.id,
+            vector_store_id=wrong_store_id,
+        )
+
+
+async def test_file_batch_pagination(vector_io_adapter):
+    """Test file batch pagination."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2", "file_3", "file_4", "file_5"]
+
+    # Setup vector store with multiple files
+    files = {}
+    for i, file_id in enumerate(file_ids):
+        files[file_id] = VectorStoreFileObject(
+            id=file_id,
+            object="vector_store.file",
+            usage_bytes=1000,
+            created_at=int(time.time()) + i,
+            vector_store_id=store_id,
+            status="completed",
+            chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        )
+
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": files,
+        "file_ids": file_ids,
+    }
+
+    # Mock file loading
+    vector_io_adapter._load_openai_vector_store_file = AsyncMock(
+        side_effect=lambda vs_id, f_id: files[f_id].model_dump()
+    )
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # Test pagination with limit
+    response = await vector_io_adapter.openai_list_files_in_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+        limit=3,
+    )
+
+    assert len(response.data) == 3
+    assert response.has_more is True
+
+    # Test pagination with after cursor
+    first_page = await vector_io_adapter.openai_list_files_in_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+        limit=2,
+    )
+
+    second_page = await vector_io_adapter.openai_list_files_in_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+        limit=2,
+        after=first_page.last_id,
+    )
+
+    assert len(first_page.data) == 2
+    assert len(second_page.data) == 2
+    # Ensure no overlap between pages
+    first_page_ids = {file_obj.id for file_obj in first_page.data}
+    second_page_ids = {file_obj.id for file_obj in second_page.data}
+    assert first_page_ids.isdisjoint(second_page_ids)
+    # Verify we got all expected files across both pages (in desc order: file_5, file_4, file_3, file_2, file_1)
+    all_returned_ids = first_page_ids | second_page_ids
+    assert all_returned_ids == {"file_2", "file_3", "file_4", "file_5"}
+
+
+async def test_file_batch_status_filtering(vector_io_adapter):
+    """Test file batch status filtering."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2", "file_3"]
+
+    # Setup vector store with files having different statuses
+    files = {}
+    statuses = ["completed", "in_progress", "completed"]
+    for i, (file_id, status) in enumerate(zip(file_ids, statuses, strict=False)):
+        files[file_id] = VectorStoreFileObject(
+            id=file_id,
+            object="vector_store.file",
+            usage_bytes=1000,
+            created_at=int(time.time()) + i,
+            vector_store_id=store_id,
+            status=status,
+            chunking_strategy=VectorStoreChunkingStrategyAuto(),
+        )
+
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": files,
+        "file_ids": file_ids,
+    }
+
+    # Mock file loading
+    vector_io_adapter._load_openai_vector_store_file = AsyncMock(
+        side_effect=lambda vs_id, f_id: files[f_id].model_dump()
+    )
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # Test filtering by completed status
+    response = await vector_io_adapter.openai_list_files_in_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+        filter="completed",
+    )
+
+    assert len(response.data) == 2  # Only 2 completed files
+    for file_obj in response.data:
+        assert file_obj.status == "completed"
+
+    # Test filtering by in_progress status
+    response = await vector_io_adapter.openai_list_files_in_vector_store_file_batch(
+        batch_id=batch.id,
+        vector_store_id=store_id,
+        filter="in_progress",
+    )
+
+    assert len(response.data) == 1  # Only 1 in_progress file
+    assert response.data[0].status == "in_progress"
+
+
+async def test_cancel_completed_batch_fails(vector_io_adapter):
+    """Test that cancelling completed batch fails."""
+    store_id = "vs_1234"
+    file_ids = ["file_1"]
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # Manually update status to completed
+    batch_info = vector_io_adapter.openai_file_batches[batch.id]
+    batch_info["status"] = "completed"
+
+    # Try to cancel - should fail
+    with pytest.raises(ValueError, match="Cannot cancel batch .* with status completed"):
+        await vector_io_adapter.openai_cancel_vector_store_file_batch(
+            batch_id=batch.id,
+            vector_store_id=store_id,
+        )
+
+
+async def test_file_batch_persistence_across_restarts(vector_io_adapter):
+    """Test that in-progress file batches are persisted and resumed after restart."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2"]
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Mock attach method and batch processing to avoid actual processing
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+    batch_id = batch.id
+
+    # Verify batch is saved to persistent storage
+    assert batch_id in vector_io_adapter.openai_file_batches
+    saved_batch_key = f"openai_vector_stores_file_batches:v3::{batch_id}"
+    saved_batch = await vector_io_adapter.kvstore.get(saved_batch_key)
+    assert saved_batch is not None
+
+    # Verify the saved batch data contains all necessary information
+    saved_data = json.loads(saved_batch)
+    assert saved_data["id"] == batch_id
+    assert saved_data["status"] == "in_progress"
+    assert saved_data["file_ids"] == file_ids
+
+    # Simulate restart - clear in-memory cache and reload from persistence
+    vector_io_adapter.openai_file_batches.clear()
+
+    # Temporarily restore the real initialize_openai_vector_stores method
+    from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+
+    real_method = OpenAIVectorStoreMixin.initialize_openai_vector_stores
+    await real_method(vector_io_adapter)
+
+    # Re-mock the processing method to prevent any resumed batches from processing
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    # Verify batch was restored
+    assert batch_id in vector_io_adapter.openai_file_batches
+    restored_batch = vector_io_adapter.openai_file_batches[batch_id]
+    assert restored_batch["status"] == "in_progress"
+    assert restored_batch["id"] == batch_id
+    assert vector_io_adapter.openai_file_batches[batch_id]["file_ids"] == file_ids
+
+
+async def test_cancelled_batch_persists_in_storage(vector_io_adapter):
+    """Test that cancelled batches persist in storage with updated status."""
+    store_id = "vs_1234"
+    file_ids = ["file_1", "file_2"]
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Mock attach method and batch processing to avoid actual processing
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    # Create batch
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+    batch_id = batch.id
+
+    # Verify batch is initially saved to persistent storage
+    saved_batch_key = f"openai_vector_stores_file_batches:v3::{batch_id}"
+    saved_batch = await vector_io_adapter.kvstore.get(saved_batch_key)
+    assert saved_batch is not None
+
+    # Cancel the batch
+    cancelled_batch = await vector_io_adapter.openai_cancel_vector_store_file_batch(
+        batch_id=batch_id,
+        vector_store_id=store_id,
+    )
+
+    # Verify batch status is cancelled
+    assert cancelled_batch.status == "cancelled"
+
+    # Verify batch persists in storage with cancelled status
+    updated_batch = await vector_io_adapter.kvstore.get(saved_batch_key)
+    assert updated_batch is not None
+    batch_data = json.loads(updated_batch)
+    assert batch_data["status"] == "cancelled"
+
+    # Batch should remain in memory cache (matches vector store pattern)
+    assert batch_id in vector_io_adapter.openai_file_batches
+    assert vector_io_adapter.openai_file_batches[batch_id]["status"] == "cancelled"
+
+
+async def test_only_in_progress_batches_resumed(vector_io_adapter):
+    """Test that only in-progress batches are resumed for processing, but all batches are persisted."""
+    store_id = "vs_1234"
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Mock attach method and batch processing to prevent automatic completion
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    # Create multiple batches
+    batch1 = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id, file_ids=["file_1"]
+    )
+    batch2 = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id, file_ids=["file_2"]
+    )
+
+    # Complete one batch (should persist with completed status)
+    batch1_info = vector_io_adapter.openai_file_batches[batch1.id]
+    batch1_info["status"] = "completed"
+    await vector_io_adapter._save_openai_vector_store_file_batch(batch1.id, batch1_info)
+
+    # Cancel the other batch (should persist with cancelled status)
+    await vector_io_adapter.openai_cancel_vector_store_file_batch(batch_id=batch2.id, vector_store_id=store_id)
+
+    # Create a third batch that stays in progress
+    batch3 = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id, file_ids=["file_3"]
+    )
+
+    # Simulate restart - clear memory and reload from persistence
+    vector_io_adapter.openai_file_batches.clear()
+
+    # Temporarily restore the real initialize_openai_vector_stores method
+    from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
+
+    real_method = OpenAIVectorStoreMixin.initialize_openai_vector_stores
+    await real_method(vector_io_adapter)
+
+    # All batches should be restored from persistence
+    assert batch1.id in vector_io_adapter.openai_file_batches  # completed, persisted
+    assert batch2.id in vector_io_adapter.openai_file_batches  # cancelled, persisted
+    assert batch3.id in vector_io_adapter.openai_file_batches  # in-progress, restored
+
+    # Check their statuses
+    assert vector_io_adapter.openai_file_batches[batch1.id]["status"] == "completed"
+    assert vector_io_adapter.openai_file_batches[batch2.id]["status"] == "cancelled"
+    assert vector_io_adapter.openai_file_batches[batch3.id]["status"] == "in_progress"
+
+    # Resume functionality is mocked, so we're only testing persistence
+
+
+async def test_cleanup_expired_file_batches(vector_io_adapter):
+    """Test that expired file batches are cleaned up properly."""
+    store_id = "vs_1234"
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Mock processing to prevent automatic completion
+    vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
+    vector_io_adapter._process_file_batch_async = AsyncMock()
+
+    # Create batches with different ages
+    import time
+
+    current_time = int(time.time())
+
+    # Create an old expired batch (10 days old)
+    old_batch_info = {
+        "id": "batch_old",
+        "vector_store_id": store_id,
+        "status": "completed",
+        "created_at": current_time - (10 * 24 * 60 * 60),  # 10 days ago
+        "expires_at": current_time - (3 * 24 * 60 * 60),  # Expired 3 days ago
+        "file_ids": ["file_1"],
+    }
+
+    # Create a recent valid batch
+    new_batch_info = {
+        "id": "batch_new",
+        "vector_store_id": store_id,
+        "status": "completed",
+        "created_at": current_time - (1 * 24 * 60 * 60),  # 1 day ago
+        "expires_at": current_time + (6 * 24 * 60 * 60),  # Expires in 6 days
+        "file_ids": ["file_2"],
+    }
+
+    # Store both batches in persistent storage
+    await vector_io_adapter._save_openai_vector_store_file_batch("batch_old", old_batch_info)
+    await vector_io_adapter._save_openai_vector_store_file_batch("batch_new", new_batch_info)
+
+    # Add to in-memory cache
+    vector_io_adapter.openai_file_batches["batch_old"] = old_batch_info
+    vector_io_adapter.openai_file_batches["batch_new"] = new_batch_info
+
+    # Verify both batches exist before cleanup
+    assert "batch_old" in vector_io_adapter.openai_file_batches
+    assert "batch_new" in vector_io_adapter.openai_file_batches
+
+    # Run cleanup
+    await vector_io_adapter._cleanup_expired_file_batches()
+
+    # Verify expired batch was removed from memory
+    assert "batch_old" not in vector_io_adapter.openai_file_batches
+    assert "batch_new" in vector_io_adapter.openai_file_batches
+
+    # Verify expired batch was removed from storage
+    old_batch_key = "openai_vector_stores_file_batches:v3::batch_old"
+    new_batch_key = "openai_vector_stores_file_batches:v3::batch_new"
+
+    old_stored = await vector_io_adapter.kvstore.get(old_batch_key)
+    new_stored = await vector_io_adapter.kvstore.get(new_batch_key)
+
+    assert old_stored is None  # Expired batch should be deleted
+    assert new_stored is not None  # Valid batch should remain
+
+
+async def test_expired_batch_access_error(vector_io_adapter):
+    """Test that accessing expired batches returns clear error message."""
+    store_id = "vs_1234"
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    # Create an expired batch
+    import time
+
+    current_time = int(time.time())
+
+    expired_batch_info = {
+        "id": "batch_expired",
+        "vector_store_id": store_id,
+        "status": "completed",
+        "created_at": current_time - (10 * 24 * 60 * 60),  # 10 days ago
+        "expires_at": current_time - (3 * 24 * 60 * 60),  # Expired 3 days ago
+        "file_ids": ["file_1"],
+    }
+
+    # Add to in-memory cache (simulating it was loaded before expiration)
+    vector_io_adapter.openai_file_batches["batch_expired"] = expired_batch_info
+
+    # Try to access expired batch
+    with pytest.raises(ValueError, match="File batch batch_expired has expired after 7 days from creation"):
+        vector_io_adapter._get_and_validate_batch("batch_expired", store_id)
+
+
+async def test_max_concurrent_files_per_batch(vector_io_adapter):
+    """Test that file batch processing respects MAX_CONCURRENT_FILES_PER_BATCH limit."""
+    import asyncio
+
+    store_id = "vs_1234"
+
+    # Setup vector store
+    vector_io_adapter.openai_vector_stores[store_id] = {
+        "id": store_id,
+        "name": "Test Store",
+        "files": {},
+        "file_ids": [],
+    }
+
+    active_files = 0
+
+    async def mock_attach_file_with_delay(vector_store_id: str, file_id: str, **kwargs):
+        """Mock that tracks concurrency and blocks indefinitely to test concurrency limit."""
+        nonlocal active_files
+        active_files += 1
+
+        # Block indefinitely to test concurrency limit
+        await asyncio.sleep(float("inf"))
+
+    # Replace the attachment method
+    vector_io_adapter.openai_attach_file_to_vector_store = mock_attach_file_with_delay
+
+    # Create a batch with more files than the concurrency limit
+    file_ids = [f"file_{i}" for i in range(8)]  # 8 files, but limit should be 5
+
+    batch = await vector_io_adapter.openai_create_vector_store_file_batch(
+        vector_store_id=store_id,
+        file_ids=file_ids,
+    )
+
+    # Give time for the semaphore logic to start processing files
+    await asyncio.sleep(0.2)
+
+    # Verify that only MAX_CONCURRENT_FILES_PER_BATCH files are processing concurrently
+    # The semaphore in _process_files_with_concurrency should limit this
+    from llama_stack.providers.utils.memory.openai_vector_store_mixin import MAX_CONCURRENT_FILES_PER_BATCH
+
+    assert active_files == MAX_CONCURRENT_FILES_PER_BATCH, (
+        f"Expected {MAX_CONCURRENT_FILES_PER_BATCH} active files, got {active_files}"
+    )
+
+    # Verify batch is in progress
+    assert batch.status == "in_progress"
+    assert batch.file_counts.total == 8
+    assert batch.file_counts.in_progress == 8

From 50f9ca3541c8d6fb7c5593e996d471a415758c39 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 7 Oct 2025 03:13:11 -0700
Subject: [PATCH 08/14] chore: remove dead code (#3713)

# What does this PR do?


## Test Plan
---
 llama_stack/cli/stack/run.py | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index cec101083..19930a27b 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -75,39 +75,6 @@ class StackRun(Subcommand):
             help="Start the UI server",
         )
 
-    def _resolve_config_and_distro(self, args: argparse.Namespace) -> tuple[Path | None, str | None]:
-        """Resolve config file path and distribution name from args.config"""
-        from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
-
-        if not args.config:
-            return None, None
-
-        config_file = Path(args.config)
-        has_yaml_suffix = args.config.endswith(".yaml")
-        distro_name = None
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if this is a distribution
-            config_file = Path(REPO_ROOT) / "llama_stack" / "distributions" / args.config / "run.yaml"
-            if config_file.exists():
-                distro_name = args.config
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if it's a build config saved to ~/.llama dir
-            config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
-
-        if not config_file.exists():
-            self.parser.error(
-                f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
-            )
-
-        if not config_file.is_file():
-            self.parser.error(
-                f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
-            )
-
-        return config_file, distro_name
-
     def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
         import yaml
 

From 509ac4a659ae95eeae305154f7be643512127e2f Mon Sep 17 00:00:00 2001
From: Justin <justinwlin@gmail.com>
Date: Tue, 7 Oct 2025 03:24:50 -0700
Subject: [PATCH 09/14] feat: enable Runpod inference adapter (#3707)

# What does this PR do?
Sorry to @mattf I thought I could close the other PR and reopen it.. But
I didn't have the option to reopen it now. I just didn't want it to keep
notifying maintainers if I would make other commits for testing.

Continuation of: https://github.com/llamastack/llama-stack/pull/3641

PR fixes Runpod Adapter
https://github.com/llamastack/llama-stack/issues/3517

## What I fixed from before:
Continuation of: https://github.com/llamastack/llama-stack/pull/3641

1. Made it all OpenAI
2. Fixed the class up since the OpenAIMixin had a couple changes with
the pydantic base model stuff.
3. Test to make sure that we could dynamically find models and use the
resulting identifier to make requests
```bash
curl -X GET \
  -H "Content-Type: application/json" \
  "http://localhost:8321/v1/models"
```

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

```
# RunPod Provider Quick Start

## Prerequisites
- Python 3.10+
- Git
- RunPod API token

## Setup for Development

```bash
# 1. Clone and enter the repository
cd (into the repo)

# 2. Create and activate virtual environment
python3 -m venv .venv
source .venv/bin/activate

# 3. Remove any existing llama-stack installation
pip uninstall llama-stack llama-stack-client -y

# 4. Install llama-stack in development mode
pip install -e .

# 5. Build using local development code
(Found this through the Discord)
LLAMA_STACK_DIR=. llama stack build

# When prompted during build:
# - Name: runpod-dev
# - Image type: venv
# - Inference provider: remote::runpod
# - Safety provider: "llama-guard"
# - Other providers: first defaults
```

## Configure the Stack

The RunPod adapter automatically discovers models from your endpoint via the `/v1/models` API.
No manual model configuration is required - just set your environment variables.

## Run the Server

### Important: Use the Build-Created Virtual Environment

```bash
# Exit the development venv if you're in it
deactivate

# Activate the build-created venv (NOT .venv)
cd (lama-stack folder github repo)
source llamastack-runpod-dev/bin/activate
```

### For Qwen3-32B-AWQ Public Endpoint (Recommended)

```bash
# Set environment variables
export RUNPOD_URL="https://api.runpod.ai/v2/qwen3-32b-awq/openai/v1"
export RUNPOD_API_TOKEN="your_runpod_api_key"

# Start server
llama stack run
~/.llama/distributions/llamastack-runpod-dev/llamastack-runpod-dev-run.yaml
```

## Quick Test

### 1. List Available Models (Dynamic Discovery)

First, check which models are available on your RunPod endpoint:

```bash
curl -X GET \
  -H "Content-Type: application/json" \
  "http://localhost:8321/v1/models"
```

**Example Response:**
```json
{
  "data": [
    {
      "identifier": "qwen3-32b-awq",
      "provider_resource_id": "Qwen/Qwen3-32B-AWQ",
      "provider_id": "runpod",
      "type": "model",
      "metadata": {},
      "model_type": "llm"
    }
  ]
}
```

**Note:** Use the `identifier` value from the response above in your requests below.

### 2. Chat Completion (Non-streaming)

Replace `qwen3-32b-awq` with your model identifier from step 1:

```bash
curl -X POST http://localhost:8321/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "qwen3-32b-awq",
    "messages": [{"role": "user", "content": "Hello, count to 3"}],
    "stream": false
  }'
```

### 3. Chat Completion (Streaming)

```bash
curl -X POST http://localhost:8321/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "qwen3-32b-awq",
    "messages": [{"role": "user", "content": "Count to 5"}],
    "stream": true
  }'
```

**Clean streaming output:**
```bash
curl -N -X POST http://localhost:8321/v1/chat/completions \
  -H "Content-Type: application/json" \
-d '{"model": "qwen3-32b-awq", "messages": [{"role": "user", "content":
"Count to 5"}], "stream": true}' \
  2>/dev/null | while read -r line; do
echo "$line" | grep "^data: " | sed 's/^data: //' | jq -r
'.choices[0].delta.content // empty' 2>/dev/null
  done
```

**Expected Output:**
```
1
2
3
4
5
```
---
 .../remote/inference/runpod/__init__.py       |   2 +-
 .../remote/inference/runpod/runpod.py         | 123 ++++++++++--------
 2 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/llama_stack/providers/remote/inference/runpod/__init__.py b/llama_stack/providers/remote/inference/runpod/__init__.py
index 69bf95046..d1fd2b718 100644
--- a/llama_stack/providers/remote/inference/runpod/__init__.py
+++ b/llama_stack/providers/remote/inference/runpod/__init__.py
@@ -11,6 +11,6 @@ async def get_adapter_impl(config: RunpodImplConfig, _deps):
     from .runpod import RunpodInferenceAdapter
 
     assert isinstance(config, RunpodImplConfig), f"Unexpected config type: {type(config)}"
-    impl = RunpodInferenceAdapter(config)
+    impl = RunpodInferenceAdapter(config=config)
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index 08652f8c0..f752740e5 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -4,69 +4,86 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from typing import Any
 
-from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.inference import OpenAIEmbeddingsResponse
-
-# from llama_stack.providers.datatypes import ModelsProtocolPrivate
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, build_hf_repo_model_entry
-from llama_stack.providers.utils.inference.openai_compat import (
-    get_sampling_options,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    chat_completion_request_to_prompt,
+from llama_stack.apis.inference import (
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
 )
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from .config import RunpodImplConfig
 
-# https://docs.runpod.io/serverless/vllm/overview#compatible-models
-# https://github.com/runpod-workers/worker-vllm/blob/main/README.md#compatible-model-architectures
-RUNPOD_SUPPORTED_MODELS = {
-    "Llama3.1-8B": "meta-llama/Llama-3.1-8B",
-    "Llama3.1-70B": "meta-llama/Llama-3.1-70B",
-    "Llama3.1-405B:bf16-mp8": "meta-llama/Llama-3.1-405B",
-    "Llama3.1-405B": "meta-llama/Llama-3.1-405B-FP8",
-    "Llama3.1-405B:bf16-mp16": "meta-llama/Llama-3.1-405B",
-    "Llama3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct",
-    "Llama3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct",
-    "Llama3.1-405B-Instruct:bf16-mp8": "meta-llama/Llama-3.1-405B-Instruct",
-    "Llama3.1-405B-Instruct": "meta-llama/Llama-3.1-405B-Instruct-FP8",
-    "Llama3.1-405B-Instruct:bf16-mp16": "meta-llama/Llama-3.1-405B-Instruct",
-    "Llama3.2-1B": "meta-llama/Llama-3.2-1B",
-    "Llama3.2-3B": "meta-llama/Llama-3.2-3B",
-}
 
-SAFETY_MODELS_ENTRIES = []
+class RunpodInferenceAdapter(OpenAIMixin):
+    """
+    Adapter for RunPod's OpenAI-compatible API endpoints.
+    Supports VLLM for serverless endpoint self-hosted or public endpoints.
+    Can work with any runpod endpoints that support OpenAI-compatible API
+    """
 
-# Create MODEL_ENTRIES from RUNPOD_SUPPORTED_MODELS for compatibility with starter template
-MODEL_ENTRIES = [
-    build_hf_repo_model_entry(provider_model_id, model_descriptor)
-    for provider_model_id, model_descriptor in RUNPOD_SUPPORTED_MODELS.items()
-] + SAFETY_MODELS_ENTRIES
+    config: RunpodImplConfig
 
+    def get_api_key(self) -> str:
+        """Get API key for OpenAI client."""
+        return self.config.api_token
 
-class RunpodInferenceAdapter(
-    ModelRegistryHelper,
-    Inference,
-):
-    def __init__(self, config: RunpodImplConfig) -> None:
-        ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
-        self.config = config
+    def get_base_url(self) -> str:
+        """Get base URL for OpenAI client."""
+        return self.config.url
 
-    def _get_params(self, request: ChatCompletionRequest) -> dict:
-        return {
-            "model": self.map_to_provider_model(request.model),
-            "prompt": chat_completion_request_to_prompt(request),
-            "stream": request.stream,
-            **get_sampling_options(request.sampling_params),
-        }
-
-    async def openai_embeddings(
+    async def openai_chat_completion(
         self,
         model: str,
-        input: str | list[str],
-        encoding_format: str | None = "float",
-        dimensions: int | None = None,
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
         user: str | None = None,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    ):
+        """Override to add RunPod-specific stream_options requirement."""
+        if stream and not stream_options:
+            stream_options = {"include_usage": True}
+
+        return await super().openai_chat_completion(
+            model=model,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )

From 1fcde5fc2fd0871a0a77607d639535948bdd7da7 Mon Sep 17 00:00:00 2001
From: Sumanth Kamenani <skamenan@redhat.com>
Date: Tue, 7 Oct 2025 09:01:36 -0400
Subject: [PATCH 10/14] fix: update pyproject.toml dependencies for vector
 processing (#3555)

What does this PR do?

Updates pyproject.toml dependencies to fix vector processing
compatibility issues.

closes: #3495

  Test Plan

  Tested llama stack server with faiss vector database:

1. Built and ran server: llama stack build --distro starter --image-type
venv --image-name llamastack-faiss
3. Tested file upload: Successfully uploaded PDF via /v1/openai/v1/files
  4. Tested vector operations:
    - Created vector store with faiss backend
    - Added PDF to vector store
    - Performed semantic search queries
---
 .../providers/registry/tool_runtime.py        |  6 ++---
 llama_stack/providers/registry/vector_io.py   | 27 ++++++++++---------
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py
index ad8c31dfd..39dc7fccd 100644
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@@ -11,6 +11,7 @@ from llama_stack.providers.datatypes import (
     ProviderSpec,
     RemoteProviderSpec,
 )
+from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
 
 
 def available_providers() -> list[ProviderSpec]:
@@ -18,9 +19,8 @@ def available_providers() -> list[ProviderSpec]:
         InlineProviderSpec(
             api=Api.tool_runtime,
             provider_type="inline::rag-runtime",
-            pip_packages=[
-                "chardet",
-                "pypdf",
+            pip_packages=DEFAULT_VECTOR_IO_DEPS
+            + [
                 "tqdm",
                 "numpy",
                 "scikit-learn",
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index ebab7aaf9..da2a68535 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -12,13 +12,16 @@ from llama_stack.providers.datatypes import (
     RemoteProviderSpec,
 )
 
+# Common dependencies for all vector IO providers that support document processing
+DEFAULT_VECTOR_IO_DEPS = ["chardet", "pypdf"]
+
 
 def available_providers() -> list[ProviderSpec]:
     return [
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::meta-reference",
-            pip_packages=["faiss-cpu"],
+            pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.faiss",
             config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
             deprecation_warning="Please use the `inline::faiss` provider instead.",
@@ -29,7 +32,7 @@ def available_providers() -> list[ProviderSpec]:
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::faiss",
-            pip_packages=["faiss-cpu"],
+            pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.faiss",
             config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -82,7 +85,7 @@ more details about Faiss in general.
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::sqlite-vec",
-            pip_packages=["sqlite-vec"],
+            pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.sqlite_vec",
             config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -289,7 +292,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::sqlite_vec",
-            pip_packages=["sqlite-vec"],
+            pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.sqlite_vec",
             config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
             deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
@@ -303,7 +306,7 @@ Please refer to the sqlite-vec provider documentation.
             api=Api.vector_io,
             adapter_type="chromadb",
             provider_type="remote::chromadb",
-            pip_packages=["chromadb-client"],
+            pip_packages=["chromadb-client"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.remote.vector_io.chroma",
             config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -345,7 +348,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::chromadb",
-            pip_packages=["chromadb"],
+            pip_packages=["chromadb"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.chroma",
             config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -389,7 +392,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
             api=Api.vector_io,
             adapter_type="pgvector",
             provider_type="remote::pgvector",
-            pip_packages=["psycopg2-binary"],
+            pip_packages=["psycopg2-binary"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.remote.vector_io.pgvector",
             config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -500,7 +503,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
             api=Api.vector_io,
             adapter_type="weaviate",
             provider_type="remote::weaviate",
-            pip_packages=["weaviate-client>=4.16.5"],
+            pip_packages=["weaviate-client>=4.16.5"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.remote.vector_io.weaviate",
             config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
             provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
@@ -541,7 +544,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::qdrant",
-            pip_packages=["qdrant-client"],
+            pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.qdrant",
             config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -594,7 +597,7 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
             api=Api.vector_io,
             adapter_type="qdrant",
             provider_type="remote::qdrant",
-            pip_packages=["qdrant-client"],
+            pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.remote.vector_io.qdrant",
             config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -607,7 +610,7 @@ Please refer to the inline provider documentation.
             api=Api.vector_io,
             adapter_type="milvus",
             provider_type="remote::milvus",
-            pip_packages=["pymilvus>=2.4.10"],
+            pip_packages=["pymilvus>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.remote.vector_io.milvus",
             config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
             api_dependencies=[Api.inference],
@@ -813,7 +816,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
         InlineProviderSpec(
             api=Api.vector_io,
             provider_type="inline::milvus",
-            pip_packages=["pymilvus[milvus-lite]>=2.4.10"],
+            pip_packages=["pymilvus[milvus-lite]>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS,
             module="llama_stack.providers.inline.vector_io.milvus",
             config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
             api_dependencies=[Api.inference],

From 8b9af03a1bd631768f8090afdbe9523e2fe0ae6c Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Tue, 7 Oct 2025 09:04:07 -0400
Subject: [PATCH 11/14] fix: refresh log should be debug (#3720)

# What does this PR do?

when using a distro like starter where a bunch of providers are disabled
I should not see logs like:

```
         in the provider data header, e.g. x-llamastack-provider-data: {"groq_api_key": "<API_KEY>"}, or in the provider config.
WARNING  2025-10-07 08:38:52,117 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider sambanova: API key is not set. Please provide a valid
         API key in the provider data header, e.g. x-llamastack-provider-data: {"sambanova_api_key": "<API_KEY>"}, or in the provider config.
WARNING  2025-10-07 08:43:52,123 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider fireworks: Pass Fireworks API Key in the header
         X-LlamaStack-Provider-Data as { "fireworks_api_key": <your api key>}
WARNING  2025-10-07 08:43:52,126 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider together: Pass Together API Key in the header
         X-LlamaStack-Provider-Data as { "together_api_key": <your api key>}
WARNING  2025-10-07 08:43:52,129 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider openai: API key is not set. Please provide a valid API
         key in the provider data header, e.g. x-llamastack-provider-data: {"openai_api_key": "<API_KEY>"}, or in the provider config.
WARNING  2025-10-07 08:43:52,132 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider anthropic: API key is not set. Please provide a valid
         API key in the provider data header, e.g. x-llamastack-provider-data: {"anthropic_api_key": "<API_KEY>"}, or in the provider config.
WARNING  2025-10-07 08:43:52,136 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider gemini: API key is not set. Please provide a valid API
         key in the provider data header, e.g. x-llamastack-provider-data: {"gemini_api_key": "<API_KEY>"}, or in the provider config.
WARNING  2025-10-07 08:43:52,139 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider groq: API key is not set. Please provide a valid API key
         in the provider data header, e.g. x-llamastack-provider-data: {"groq_api_key": "<API_KEY>"}, or in the provider config.
WARNING  2025-10-07 08:43:52,142 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider sambanova: API key is not set. Please provide a valid
         API key in the provider data header, e.g. x-llamastack-provider-data: {"sambanova_api_key": "<API_KEY>"}, or in the provider config.
^CINFO     2025-10-07 08:46:11,996 llama_stack.core.utils.exec:75 core:
```

as WARNING. Switch to Debug.

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 llama_stack/core/routing_tables/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py
index 641c73c16..69d7e9b6f 100644
--- a/llama_stack/core/routing_tables/models.py
+++ b/llama_stack/core/routing_tables/models.py
@@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
             try:
                 models = await provider.list_models()
             except Exception as e:
-                logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
+                logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
                 continue
 
             self.listed_providers.add(provider_id)

From e892a3f7f4cafdc1fd0ae1b94e4f8edd11bd0119 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 7 Oct 2025 09:19:56 -0400
Subject: [PATCH 12/14] feat: add refresh_models support to inference adapters
 (default: false) (#3719)

# What does this PR do?

inference adapters can now configure `refresh_models: bool` to control
periodic model listing from their providers

BREAKING CHANGE: together inference adapter default changed. previously
always refreshed, now follows config.

addresses "models: refresh" on #3517

## Test Plan

ci w/ new tests
---
 .../providers/inference/remote_anthropic.mdx  |  1 +
 .../docs/providers/inference/remote_azure.mdx |  1 +
 .../providers/inference/remote_bedrock.mdx    |  1 +
 .../providers/inference/remote_cerebras.mdx   |  1 +
 .../providers/inference/remote_databricks.mdx |  1 +
 .../providers/inference/remote_fireworks.mdx  |  1 +
 .../providers/inference/remote_gemini.mdx     |  1 +
 docs/docs/providers/inference/remote_groq.mdx |  1 +
 .../inference/remote_llama-openai-compat.mdx  |  1 +
 .../providers/inference/remote_nvidia.mdx     |  1 +
 .../providers/inference/remote_ollama.mdx     |  2 +-
 .../providers/inference/remote_openai.mdx     |  1 +
 .../inference/remote_passthrough.mdx          |  1 +
 .../providers/inference/remote_runpod.mdx     |  1 +
 .../providers/inference/remote_sambanova.mdx  |  1 +
 docs/docs/providers/inference/remote_tgi.mdx  |  1 +
 .../providers/inference/remote_together.mdx   |  1 +
 .../providers/inference/remote_vertexai.mdx   |  1 +
 docs/docs/providers/inference/remote_vllm.mdx |  2 +-
 .../providers/inference/remote_watsonx.mdx    |  1 +
 docs/docs/providers/safety/remote_bedrock.mdx |  1 +
 .../remote/inference/databricks/databricks.py |  3 --
 .../remote/inference/ollama/config.py         |  6 ---
 .../remote/inference/ollama/ollama.py         |  3 --
 .../remote/inference/together/together.py     |  3 --
 .../providers/remote/inference/vllm/config.py |  4 --
 .../providers/remote/inference/vllm/vllm.py   |  4 --
 .../utils/inference/model_registry.py         |  4 ++
 .../providers/utils/inference/openai_mixin.py |  2 +-
 .../providers/inference/test_remote_vllm.py   | 40 -------------------
 .../utils/inference/test_openai_mixin.py      |  8 +++-
 31 files changed, 33 insertions(+), 67 deletions(-)

diff --git a/docs/docs/providers/inference/remote_anthropic.mdx b/docs/docs/providers/inference/remote_anthropic.mdx
index 96162d25c..44c1fcbb1 100644
--- a/docs/docs/providers/inference/remote_anthropic.mdx
+++ b/docs/docs/providers/inference/remote_anthropic.mdx
@@ -15,6 +15,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | API key for Anthropic models |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx
index 721fe429c..56a14c100 100644
--- a/docs/docs/providers/inference/remote_azure.mdx
+++ b/docs/docs/providers/inference/remote_azure.mdx
@@ -22,6 +22,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `<class 'pydantic.types.SecretStr'>` | No |  | Azure API key for Azure |
 | `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
 | `api_version` | `str \| None` | No |  | Azure API version for Azure (e.g., 2024-12-01-preview) |
diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index 2a5d1b74d..683ec12f8 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -15,6 +15,7 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx
index 1a543389d..d364b9884 100644
--- a/docs/docs/providers/inference/remote_cerebras.mdx
+++ b/docs/docs/providers/inference/remote_cerebras.mdx
@@ -15,6 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
 | `api_key` | `<class 'pydantic.types.SecretStr'>` | No |  | Cerebras API Key |
 
diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx
index 670f8a7f9..d7b0bd38d 100644
--- a/docs/docs/providers/inference/remote_databricks.mdx
+++ b/docs/docs/providers/inference/remote_databricks.mdx
@@ -15,6 +15,7 @@ Databricks inference provider for running models on Databricks' unified analytic
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `str \| None` | No |  | The URL for the Databricks model serving endpoint |
 | `api_token` | `<class 'pydantic.types.SecretStr'>` | No |  | The Databricks API token |
 
diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx
index d2c3a664e..cfdfb993c 100644
--- a/docs/docs/providers/inference/remote_fireworks.mdx
+++ b/docs/docs/providers/inference/remote_fireworks.mdx
@@ -15,6 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The Fireworks.ai API Key |
 
diff --git a/docs/docs/providers/inference/remote_gemini.mdx b/docs/docs/providers/inference/remote_gemini.mdx
index 5222eaa89..a13d1c82d 100644
--- a/docs/docs/providers/inference/remote_gemini.mdx
+++ b/docs/docs/providers/inference/remote_gemini.mdx
@@ -15,6 +15,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | API key for Gemini models |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx
index 77516ed1f..1edb4f9ea 100644
--- a/docs/docs/providers/inference/remote_groq.mdx
+++ b/docs/docs/providers/inference/remote_groq.mdx
@@ -15,6 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | The Groq API key |
 | `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
 
diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
index bcd50f772..ca5830b09 100644
--- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx
+++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
@@ -15,6 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | The Llama API key |
 | `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
 
diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx
index 348a42e59..6b5e36180 100644
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@@ -15,6 +15,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The NVIDIA API key, only needed of using the hosted service |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx
index f075607d8..e00e34e4a 100644
--- a/docs/docs/providers/inference/remote_ollama.mdx
+++ b/docs/docs/providers/inference/remote_ollama.mdx
@@ -15,8 +15,8 @@ Ollama inference provider for running local models through the Ollama runtime.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | http://localhost:11434 |  |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx
index b795d02b1..e0910c809 100644
--- a/docs/docs/providers/inference/remote_openai.mdx
+++ b/docs/docs/providers/inference/remote_openai.mdx
@@ -15,6 +15,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | API key for OpenAI models |
 | `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
 
diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx
index 58d5619b8..e356384ad 100644
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@@ -15,6 +15,7 @@ Passthrough inference provider for connecting to any external inference service
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | API Key for the passthrouth endpoint |
 
diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx
index 92cc66eb1..876532029 100644
--- a/docs/docs/providers/inference/remote_runpod.mdx
+++ b/docs/docs/providers/inference/remote_runpod.mdx
@@ -15,6 +15,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
 | `api_token` | `str \| None` | No |  | The API token |
 
diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx
index b28471890..9bd7b7613 100644
--- a/docs/docs/providers/inference/remote_sambanova.mdx
+++ b/docs/docs/providers/inference/remote_sambanova.mdx
@@ -15,6 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The SambaNova cloud API Key |
 
diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx
index 6ff82cc2b..67fe6d237 100644
--- a/docs/docs/providers/inference/remote_tgi.mdx
+++ b/docs/docs/providers/inference/remote_tgi.mdx
@@ -15,6 +15,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No |  | The URL for the TGI serving endpoint |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx
index da232a45b..6df2ca866 100644
--- a/docs/docs/providers/inference/remote_together.mdx
+++ b/docs/docs/providers/inference/remote_together.mdx
@@ -15,6 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The Together AI API Key |
 
diff --git a/docs/docs/providers/inference/remote_vertexai.mdx b/docs/docs/providers/inference/remote_vertexai.mdx
index 48da6be24..c182ed485 100644
--- a/docs/docs/providers/inference/remote_vertexai.mdx
+++ b/docs/docs/providers/inference/remote_vertexai.mdx
@@ -54,6 +54,7 @@ Available Models:
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `project` | `<class 'str'>` | No |  | Google Cloud project ID for Vertex AI |
 | `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
 
diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx
index 598f97b19..fbbd424a3 100644
--- a/docs/docs/providers/inference/remote_vllm.mdx
+++ b/docs/docs/providers/inference/remote_vllm.mdx
@@ -15,11 +15,11 @@ Remote vLLM inference provider for connecting to vLLM servers.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
 | `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
 | `api_token` | `str \| None` | No | fake | The API token |
 | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx
index 8cd3b2869..33bc5bbc3 100644
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@@ -15,6 +15,7 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key |
 | `project_id` | `str \| None` | No |  | The Project ID key |
diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx
index 530a208b5..663a761f0 100644
--- a/docs/docs/providers/safety/remote_bedrock.mdx
+++ b/docs/docs/providers/safety/remote_bedrock.mdx
@@ -15,6 +15,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py
index f4ad1be94..200b36171 100644
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -41,9 +41,6 @@ class DatabricksInferenceAdapter(OpenAIMixin):
             ).serving_endpoints.list()  # TODO: this is not async
         ]
 
-    async def should_refresh_models(self) -> bool:
-        return False
-
     async def openai_completion(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index d2f104e1e..1e4ce9113 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -6,8 +6,6 @@
 
 from typing import Any
 
-from pydantic import Field
-
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 
 DEFAULT_OLLAMA_URL = "http://localhost:11434"
@@ -15,10 +13,6 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
 
 class OllamaImplConfig(RemoteInferenceProviderConfig):
     url: str = DEFAULT_OLLAMA_URL
-    refresh_models: bool = Field(
-        default=False,
-        description="Whether to refresh models periodically",
-    )
 
     @classmethod
     def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index e5b08997c..67d0caa54 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -72,9 +72,6 @@ class OllamaInferenceAdapter(OpenAIMixin):
                 f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
             )
 
-    async def should_refresh_models(self) -> bool:
-        return self.config.refresh_models
-
     async def health(self) -> HealthResponse:
         """
         Performs a health check by verifying connectivity to the Ollama server.
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index fbefe630f..224de6721 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -63,9 +63,6 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
         # Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
         return [m.id for m in await self._get_client().models.list()]
 
-    async def should_refresh_models(self) -> bool:
-        return True
-
     async def openai_embeddings(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py
index 86ef3fe26..87c5408d3 100644
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@@ -30,10 +30,6 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
         default=True,
         description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
     )
-    refresh_models: bool = Field(
-        default=False,
-        description="Whether to refresh models periodically",
-    )
 
     @field_validator("tls_verify")
     @classmethod
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 4e7884cd2..310eaf7b6 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -53,10 +53,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
                 "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
             )
 
-    async def should_refresh_models(self) -> bool:
-        # Strictly respecting the refresh_models directive
-        return self.config.refresh_models
-
     async def health(self) -> HealthResponse:
         """
         Performs a health check by verifying connectivity to the remote vLLM server.
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index 4913c2e1f..9d42d68c6 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -24,6 +24,10 @@ class RemoteInferenceProviderConfig(BaseModel):
         default=None,
         description="List of models that should be registered with the model registry. If None, all models are allowed.",
     )
+    refresh_models: bool = Field(
+        default=False,
+        description="Whether to refresh models periodically from the provider",
+    )
 
 
 # TODO: this class is more confusing than useful right now. We need to make it
diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py
index 9137013ee..3c5c5b4de 100644
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@@ -484,7 +484,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         return model in self._model_cache
 
     async def should_refresh_models(self) -> bool:
-        return False
+        return self.config.refresh_models
 
     #
     # The model_dump implementations are to avoid serializing the extra fields,
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 2806f618c..6d6bb20d5 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -186,43 +186,3 @@ async def test_openai_chat_completion_is_async(vllm_inference_adapter):
 
         assert mock_create_client.call_count == 4  # no cheating
         assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
-
-
-async def test_should_refresh_models():
-    """
-    Test the should_refresh_models method with different refresh_models configurations.
-
-    This test verifies that:
-    1. When refresh_models is True, should_refresh_models returns True regardless of api_token
-    2. When refresh_models is False, should_refresh_models returns False regardless of api_token
-    """
-
-    # Test case 1: refresh_models is True, api_token is None
-    config1 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token=None, refresh_models=True)
-    adapter1 = VLLMInferenceAdapter(config=config1)
-    result1 = await adapter1.should_refresh_models()
-    assert result1 is True, "should_refresh_models should return True when refresh_models is True"
-
-    # Test case 2: refresh_models is True, api_token is empty string
-    config2 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="", refresh_models=True)
-    adapter2 = VLLMInferenceAdapter(config=config2)
-    result2 = await adapter2.should_refresh_models()
-    assert result2 is True, "should_refresh_models should return True when refresh_models is True"
-
-    # Test case 3: refresh_models is True, api_token is "fake" (default)
-    config3 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="fake", refresh_models=True)
-    adapter3 = VLLMInferenceAdapter(config=config3)
-    result3 = await adapter3.should_refresh_models()
-    assert result3 is True, "should_refresh_models should return True when refresh_models is True"
-
-    # Test case 4: refresh_models is True, api_token is real token
-    config4 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="real-token-123", refresh_models=True)
-    adapter4 = VLLMInferenceAdapter(config=config4)
-    result4 = await adapter4.should_refresh_models()
-    assert result4 is True, "should_refresh_models should return True when refresh_models is True"
-
-    # Test case 5: refresh_models is False, api_token is real token
-    config5 = VLLMInferenceAdapterConfig(url="http://test.localhost", api_token="real-token-456", refresh_models=False)
-    adapter5 = VLLMInferenceAdapter(config=config5)
-    result5 = await adapter5.should_refresh_models()
-    assert result5 is False, "should_refresh_models should return False when refresh_models is False"
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index ac4c29fea..2e3a62ca6 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -466,10 +466,16 @@ class TestOpenAIMixinModelRegistration:
         assert result is None
 
     async def test_should_refresh_models(self, mixin):
-        """Test should_refresh_models method (should always return False)"""
+        """Test should_refresh_models method returns config value"""
+        # Default config has refresh_models=False
         result = await mixin.should_refresh_models()
         assert result is False
 
+        config_with_refresh = RemoteInferenceProviderConfig(refresh_models=True)
+        mixin_with_refresh = OpenAIMixinImpl(config=config_with_refresh)
+        result_with_refresh = await mixin_with_refresh.should_refresh_models()
+        assert result_with_refresh is True
+
     async def test_register_model_error_propagation(self, mixin, mock_client_with_exception, mock_client_context):
         """Test that errors from provider API are properly propagated during registration"""
         model = Model(

From 6389bf5ffb5ecc1d30a022cb74499f01d16418c1 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Tue, 7 Oct 2025 10:09:03 -0400
Subject: [PATCH 13/14] fix: make telemetry optional for agents (#3705)

# What does this PR do?

there is a lot of code in the agents API using the telemetry API and its
helpers without checking if that API is even enabled.

This is the only API besides inference actively using telemetry code, so
after this telemetry can be optional for the entire stack


resolves #3665


## Test Plan

existing agent tests.

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .../inline/agents/meta_reference/__init__.py  |  1 +
 .../agents/meta_reference/agent_instance.py   | 86 +++++++++++--------
 .../inline/agents/meta_reference/agents.py    |  3 +
 llama_stack/providers/registry/agents.py      |  3 +
 4 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 334c32e15..37b0b50c8 100644
--- a/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
         policy,
+        Api.telemetry in deps,
     )
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 207f0daec..c2ce9aa7b 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -110,6 +110,7 @@ class ChatAgent(ShieldRunnerMixin):
         persistence_store: KVStore,
         created_at: str,
         policy: list[AccessRule],
+        telemetry_enabled: bool = False,
     ):
         self.agent_id = agent_id
         self.agent_config = agent_config
@@ -120,6 +121,7 @@ class ChatAgent(ShieldRunnerMixin):
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
         self.created_at = created_at
+        self.telemetry_enabled = telemetry_enabled
 
         ShieldRunnerMixin.__init__(
             self,
@@ -188,28 +190,30 @@ class ChatAgent(ShieldRunnerMixin):
 
     async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
         turn_id = str(uuid.uuid4())
-        span = tracing.get_current_span()
-        if span:
-            span.set_attribute("session_id", request.session_id)
-            span.set_attribute("agent_id", self.agent_id)
-            span.set_attribute("request", request.model_dump_json())
-            span.set_attribute("turn_id", turn_id)
-            if self.agent_config.name:
-                span.set_attribute("agent_name", self.agent_config.name)
+        if self.telemetry_enabled:
+            span = tracing.get_current_span()
+            if span is not None:
+                span.set_attribute("session_id", request.session_id)
+                span.set_attribute("agent_id", self.agent_id)
+                span.set_attribute("request", request.model_dump_json())
+                span.set_attribute("turn_id", turn_id)
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools(request.toolgroups)
         async for chunk in self._run_turn(request, turn_id):
             yield chunk
 
     async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
-        span = tracing.get_current_span()
-        if span:
-            span.set_attribute("agent_id", self.agent_id)
-            span.set_attribute("session_id", request.session_id)
-            span.set_attribute("request", request.model_dump_json())
-            span.set_attribute("turn_id", request.turn_id)
-            if self.agent_config.name:
-                span.set_attribute("agent_name", self.agent_config.name)
+        if self.telemetry_enabled:
+            span = tracing.get_current_span()
+            if span is not None:
+                span.set_attribute("agent_id", self.agent_id)
+                span.set_attribute("session_id", request.session_id)
+                span.set_attribute("request", request.model_dump_json())
+                span.set_attribute("turn_id", request.turn_id)
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools()
         async for chunk in self._run_turn(request):
@@ -395,9 +399,12 @@ class ChatAgent(ShieldRunnerMixin):
         touchpoint: str,
     ) -> AsyncGenerator:
         async with tracing.span("run_shields") as span:
-            span.set_attribute("input", [m.model_dump_json() for m in messages])
+            if self.telemetry_enabled and span is not None:
+                span.set_attribute("input", [m.model_dump_json() for m in messages])
+                if len(shields) == 0:
+                    span.set_attribute("output", "no shields")
+
             if len(shields) == 0:
-                span.set_attribute("output", "no shields")
                 return
 
             step_id = str(uuid.uuid4())
@@ -430,7 +437,8 @@ class ChatAgent(ShieldRunnerMixin):
                         )
                     )
                 )
-                span.set_attribute("output", e.violation.model_dump_json())
+                if self.telemetry_enabled and span is not None:
+                    span.set_attribute("output", e.violation.model_dump_json())
 
                 yield CompletionMessage(
                     content=str(e),
@@ -453,7 +461,8 @@ class ChatAgent(ShieldRunnerMixin):
                     )
                 )
             )
-            span.set_attribute("output", "no violations")
+            if self.telemetry_enabled and span is not None:
+                span.set_attribute("output", "no violations")
 
     async def _run(
         self,
@@ -518,8 +527,9 @@ class ChatAgent(ShieldRunnerMixin):
             stop_reason: StopReason | None = None
 
             async with tracing.span("inference") as span:
-                if self.agent_config.name:
-                    span.set_attribute("agent_name", self.agent_config.name)
+                if self.telemetry_enabled and span is not None:
+                    if self.agent_config.name:
+                        span.set_attribute("agent_name", self.agent_config.name)
 
                 def _serialize_nested(value):
                     """Recursively serialize nested Pydantic models to dicts."""
@@ -637,18 +647,19 @@ class ChatAgent(ShieldRunnerMixin):
                     else:
                         raise ValueError(f"Unexpected delta type {type(delta)}")
 
-                span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
-                span.set_attribute(
-                    "input",
-                    json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
-                )
-                output_attr = json.dumps(
-                    {
-                        "content": content,
-                        "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
-                    }
-                )
-                span.set_attribute("output", output_attr)
+                if self.telemetry_enabled and span is not None:
+                    span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
+                    span.set_attribute(
+                        "input",
+                        json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
+                    )
+                    output_attr = json.dumps(
+                        {
+                            "content": content,
+                            "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
+                        }
+                    )
+                    span.set_attribute("output", output_attr)
 
             n_iter += 1
             await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
@@ -756,7 +767,9 @@ class ChatAgent(ShieldRunnerMixin):
                         {
                             "tool_name": tool_call.tool_name,
                             "input": message.model_dump_json(),
-                        },
+                        }
+                        if self.telemetry_enabled
+                        else {},
                     ) as span:
                         tool_execution_start_time = datetime.now(UTC).isoformat()
                         tool_result = await self.execute_tool_call_maybe(
@@ -771,7 +784,8 @@ class ChatAgent(ShieldRunnerMixin):
                             call_id=tool_call.call_id,
                             content=tool_result.content,
                         )
-                        span.set_attribute("output", result_message.model_dump_json())
+                        if self.telemetry_enabled and span is not None:
+                            span.set_attribute("output", result_message.model_dump_json())
 
                         # Store tool execution step
                         tool_execution_step = ToolExecutionStep(
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 5431e8f28..cfaf56a34 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents):
         tool_runtime_api: ToolRuntime,
         tool_groups_api: ToolGroups,
         policy: list[AccessRule],
+        telemetry_enabled: bool = False,
     ):
         self.config = config
         self.inference_api = inference_api
@@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents):
         self.safety_api = safety_api
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
+        self.telemetry_enabled = telemetry_enabled
 
         self.in_memory_store = InmemoryKVStoreImpl()
         self.openai_responses_impl: OpenAIResponsesImpl | None = None
@@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents):
             ),
             created_at=agent_info.created_at,
             policy=self.policy,
+            telemetry_enabled=self.telemetry_enabled,
         )
 
     async def create_agent_session(
diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py
index 57110d129..bc46b4de2 100644
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@@ -36,6 +36,9 @@ def available_providers() -> list[ProviderSpec]:
                 Api.tool_runtime,
                 Api.tool_groups,
             ],
+            optional_api_dependencies=[
+                Api.telemetry,
+            ],
             description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
         ),
     ]

From d5b136ac6660e457db044b98fef0e997f5f978b4 Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Tue, 7 Oct 2025 14:00:56 -0400
Subject: [PATCH 14/14] feat: Enabling Annotations in Responses (#3698)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
Implements annotations for `file_search` tool.

Also adds some logs and tests.

## How does this work?
1. **Citation Markers**: Models insert `<|file-id|>` tokens during
generation with instructions from search results
2. **Post-Processing**: Extract markers using regex to calculate
character positions and create `AnnotationFileCitation` objects
3. **File Mapping**: Store filename metadata during vector store
operations for proper citation display

## Example
This is the updated `quickstart.py` script, which uses the `extra_body`
to register the embedding model.

```python
import io, requests
from openai import OpenAI

url="https://www.paulgraham.com/greatwork.html"
model = "gpt-4o-mini"
client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")

vs = client.vector_stores.create(
    name="my_citations_db",
    extra_body={
        "embedding_model": "ollama/nomic-embed-text:latest",
        "embedding_dimension": 768,
    }
)
response = requests.get(url)
pseudo_file = io.BytesIO(str(response.content).encode('utf-8'))
file_id = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants").id
client.vector_stores.files.create(vector_store_id=vs.id, file_id=file_id)

resp = client.responses.create(
    model=model,
    input="How do you do great work? Use our existing knowledge_search tool.",
    tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
    include=["file_search_call.results"],
)

print(resp)
```

<details>
<summary> Example of the full response </summary>

```python
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/vector_stores "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/files "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/vector_stores/vs_0f6f7e35-f48b-4850-8604-8117d9a50e0a/files "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"
Response(id='resp-28f5793d-3272-4de3-81f6-8cbf107d5bcd', created_at=1759797954.0, error=None, incomplete_details=None, instructions=None, metadata=None, model='gpt-4o-mini', object='response', output=[ResponseFileSearchToolCall(id='call_xWtvEQETN5GNiRLLiBIDKntg', queries=['how to do great work tips'], status='completed', type='file_search_call', results=[Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.3722624322210302, text='\\\'re looking where few have looked before.<br /><br />One sign that you\\\'re suited for some kind of work is when you like\\neven the parts that other people find tedious or frightening.<br /><br />But fields aren\\\'t people; you don\\\'t owe them any loyalty. If in the\\ncourse of working on one thing you discover another that\\\'s more\\nexciting, don\\\'t be afraid to switch.<br /><br />If you\\\'re making something for people, make sure it\\\'s something\\nthey actually want. The best way to do this is to make something\\nyou yourself want. Write the story you want to read; build the tool\\nyou want to use. Since your friends probably have similar interests,\\nthis will also get you your initial audience.<br /><br />This <i>should</i> follow from the excitingness rule. Obviously the most\\nexciting story to write will be the one you want to read. The reason\\nI mention this case explicitly is that so many people get it wrong.\\nInstead of making what they want, they try to make what some\\nimaginary, more sophisticated audience wants. And once you go down\\nthat route, you\\\'re lost.\\n<font color=#dddddd>[<a href="#f6n"><font color=#dddddd>6</font></a>]</font><br /><br />There are a lot of forces that will lead you astray when you\\\'re\\ntrying to figure out what to work on. Pretentiousness, fashion,\\nfear, money, politics, other people\\\'s wishes, eminent frauds. But\\nif you stick to what you find genuinely interesting, you\\\'ll be proof\\nagainst all of them. If you\\\'re interested, you\\\'re not astray.<br /><br /><br /><br /><br /><br />\\nFollowing your interests may sound like a rather passive strategy,\\nbut in practice it usually means following them past all sorts of\\nobstacles. You usually have to risk rejection and failure. So it\\ndoes take a good deal of boldness.<br /><br />But while you need boldness, you don\\\'t usually need much planning.\\nIn most cases the recipe for doing great work is simply: work hard\\non excitingly ambitious projects, and something good will come of\\nit. Instead of making a plan and then executing it, you just try\\nto preserve certain invariants.<br /><br />The trouble with planning is that it only works for achievements\\nyou can describe in advance. You can win a gold medal or get rich\\nby deciding to as a child and then tenaciously pursuing that goal,\\nbut you can\\\'t discover natural selection that way.<br /><br />I think for most people who want to do great work, the right strategy\\nis not to plan too much. At each stage do whatever seems most\\ninteresting and gives you the best options for the future. I call\\nthis approach "staying upwind." This is how most people who\\\'ve done\\ngreat work seem to have done it.<br /><br /><br /><br /><br /><br />\\nEven when you\\\'ve found something exciting to work on, working on\\nit is not always straightforward. There will be times when some new\\nidea makes you leap out of bed in the morning and get straight to\\nwork. But there will also be plenty of times when things aren\\\'t\\nlike that.<br /><br />You don\\\'t just put out your sail and get blown forward by inspiration.\\nThere are headwinds and currents and hidden shoals. So there\\\'s a\\ntechnique to working, just as there is to sailing.<br /><br />For example, while you must work hard, it\\\'s possible to work too\\nhard, and if'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.2532794607643494, text=' with anyone who\\\'s genuinely interested. If they\\\'re\\nreally good at their work, then they probably have a hobbyist\\\'s\\ninterest in it, and hobbyists always want to talk about their\\nhobbies.<br /><br />It may take some effort to find the people who are really good,\\nthough. Doing great work has such prestige that in some places,\\nparticularly universities, there\\\'s a polite fiction that everyone\\nis engaged in it. And that is far from true. People within universities\\ncan\\\'t say so openly, but the quality of the work being done in\\ndifferent departments varies immensely. Some departments have people\\ndoing great work; others have in the past; others never have.<br /><br /><br /><br /><br /><br />\\nSeek out the best colleagues. There are a lot of projects that can\\\'t\\nbe done alone, and even if you\\\'re working on one that can be, it\\\'s\\ngood to have other people to encourage you and to bounce ideas off.<br /><br />Colleagues don\\\'t just affect your work, though; they also affect\\nyou. So work with people you want to become like, because you will.<br /><br />Quality is more important than quantity in colleagues. It\\\'s better\\nto have one or two great ones than a building full of pretty good\\nones. In fact it\\\'s not merely better, but necessary, judging from\\nhistory: the degree to which great work happens in clusters suggests\\nthat one\\\'s colleagues often make the difference between doing great\\nwork and not.<br /><br />How do you know when you have sufficiently good colleagues? In my\\nexperience, when you do, you know. Which means if you\\\'re unsure,\\nyou probably don\\\'t. But it may be possible to give a more concrete\\nanswer than that. Here\\\'s an attempt: sufficiently good colleagues\\noffer <i>surprising</i> insights. They can see and do things that you\\ncan\\\'t. So if you have a handful of colleagues good enough to keep\\nyou on your toes in this sense, you\\\'re probably over the threshold.<br /><br />Most of us can benefit from collaborating with colleagues, but some\\nprojects require people on a larger scale, and starting one of those\\nis not for everyone. If you want to run a project like that, you\\\'ll\\nhave to become a manager, and managing well takes aptitude and\\ninterest like any other kind of work. If you don\\\'t have them, there\\nis no middle path: you must either force yourself to learn management\\nas a second language, or avoid such projects.\\n<font color=#dddddd>[<a href="#f27n"><font color=#dddddd>27</font></a>]</font><br /><br /><br /><br /><br /><br />\\nHusband your morale. It\\\'s the basis of everything when you\\\'re working\\non ambitious projects. You have to nurture and protect it like a\\nliving organism.<br /><br />Morale starts with your view of life. You\\\'re more likely to do great\\nwork if you\\\'re an optimist, and more likely to if you think of\\nyourself as lucky than if you think of yourself as a victim.<br /><br />Indeed, work can to some extent protect you from your problems. If\\nyou choose work that\\\'s pure, its very difficulties will serve as a\\nrefuge from the difficulties of everyday life. If this is escapism,\\nit\\\'s a very productive form of it, and one that has been used by\\nsome of the greatest minds in history.<br /><br />Morale compounds via work: high morale helps you do good work, which\\nincreases your morale and helps you do even'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.1973485818164222, text=' your\\nability and interest can take you. And you can only answer that by\\ntrying.<br /><br />Many more people could try to do great work than do. What holds\\nthem back is a combination of modesty and fear. It seems presumptuous\\nto try to be Newton or Shakespeare. It also seems hard; surely if\\nyou tried something like that, you\\\'d fail. Presumably the calculation\\nis rarely explicit. Few people consciously decide not to try to do\\ngreat work. But that\\\'s what\\\'s going on subconsciously; they shy\\naway from the question.<br /><br />So I\\\'m going to pull a sneaky trick on you. Do you want to do great\\nwork, or not? Now you have to decide consciously. Sorry about that.\\nI wouldn\\\'t have done it to a general audience. But we already know\\nyou\\\'re interested.<br /><br />Don\\\'t worry about being presumptuous. You don\\\'t have to tell anyone.\\nAnd if it\\\'s too hard and you fail, so what? Lots of people have\\nworse problems than that. In fact you\\\'ll be lucky if it\\\'s the worst\\nproblem you have.<br /><br />Yes, you\\\'ll have to work hard. But again, lots of people have to\\nwork hard. And if you\\\'re working on something you find very\\ninteresting, which you necessarily will if you\\\'re on the right path,\\nthe work will probably feel less burdensome than a lot of your\\npeers\\\'.<br /><br />The discoveries are out there, waiting to be made. Why not by you?<br /><br /><br /><br /><br /><br /><br /><br /><br /><br />\\n<b>Notes</b><br /><br />[<a name="f1n"><font color=#000000>1</font></a>]\\nI don\\\'t think you could give a precise definition of what\\ncounts as great work. Doing great work means doing something important\\nso well that you expand people\\\'s ideas of what\\\'s possible. But\\nthere\\\'s no threshold for importance. It\\\'s a matter of degree, and\\noften hard to judge at the time anyway. So I\\\'d rather people focused\\non developing their interests rather than worrying about whether\\nthey\\\'re important or not. Just try to do something amazing, and\\nleave it to future generations to say if you succeeded.<br /><br />[<a name="f2n"><font color=#000000>2</font></a>]\\nA lot of standup comedy is based on noticing anomalies in\\neveryday life. "Did you ever notice...?" New ideas come from doing\\nthis about nontrivial things. Which may help explain why people\\\'s\\nreaction to a new idea is often the first half of laughing: Ha!<br /><br />[<a name="f3n"><font color=#000000>3</font></a>]\\nThat second qualifier is critical. If you\\\'re excited about\\nsomething most authorities discount, but you can\\\'t give a more\\nprecise explanation than "they don\\\'t get it," then you\\\'re starting\\nto drift into the territory of cranks.<br /><br />[<a name="f4n"><font color=#000000>4</font></a>]\\nFinding something to work on is not simply a matter of finding\\na match between the current version of you and a list of known\\nproblems. You\\\'ll often have to coevolve with the problem. That\\\'s\\nwhy it can sometimes be so hard to figure out what to work on. The\\nsearch space is huge. It\\\'s the cartesian product of all possible\\nt'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.1764591706535943, text='\\noptimistic, and even though one of the sources of their optimism\\nis ignorance, in this case ignorance can sometimes beat knowledge.<br /><br />Try to finish what you start, though, even if it turns out to be\\nmore work than you expected. Finishing things is not just an exercise\\nin tidiness or self-discipline. In many projects a lot of the best\\nwork happens in what was meant to be the final stage.<br /><br />Another permissible lie is to exaggerate the importance of what\\nyou\\\'re working on, at least in your own mind. If that helps you\\ndiscover something new, it may turn out not to have been a lie after\\nall.\\n<font color=#dddddd>[<a href="#f7n"><font color=#dddddd>7</font></a>]</font><br /><br /><br /><br /><br /><br />\\nSince there are two senses of starting work &mdash; per day and per\\nproject &mdash; there are also two forms of procrastination. Per-project\\nprocrastination is far the more dangerous. You put off starting\\nthat ambitious project from year to year because the time isn\\\'t\\nquite right. When you\\\'re procrastinating in units of years, you can\\nget a lot not done.\\n<font color=#dddddd>[<a href="#f8n"><font color=#dddddd>8</font></a>]</font><br /><br />One reason per-project procrastination is so dangerous is that it\\nusually camouflages itself as work. You\\\'re not just sitting around\\ndoing nothing; you\\\'re working industriously on something else. So\\nper-project procrastination doesn\\\'t set off the alarms that per-day\\nprocrastination does. You\\\'re too busy to notice it.<br /><br />The way to beat it is to stop occasionally and ask yourself: Am I\\nworking on what I most want to work on? When you\\\'re young it\\\'s ok\\nif the answer is sometimes no, but this gets increasingly dangerous\\nas you get older.\\n<font color=#dddddd>[<a href="#f9n"><font color=#dddddd>9</font></a>]</font><br /><br /><br /><br /><br /><br />\\nGreat work usually entails spending what would seem to most people\\nan unreasonable amount of time on a problem. You can\\\'t think of\\nthis time as a cost, or it will seem too high. You have to find the\\nwork sufficiently engaging as it\\\'s happening.<br /><br />There may be some jobs where you have to work diligently for years\\nat things you hate before you get to the good part, but this is not\\nhow great work happens. Great work happens by focusing consistently\\non something you\\\'re genuinely interested in. When you pause to take\\nstock, you\\\'re surprised how far you\\\'ve come.<br /><br />The reason we\\\'re surprised is that we underestimate the cumulative\\neffect of work. Writing a page a day doesn\\\'t sound like much, but\\nif you do it every day you\\\'ll write a book a year. That\\\'s the key:\\nconsistency. People who do great things don\\\'t get a lot done every\\nday. They get something done, rather than nothing.<br /><br />If you do work that compounds, you\\\'ll get exponential growth. Most\\npeople who do this do it unconsciously, but it\\\'s worth stopping to\\nthink about. Learning, for example, is an instance of this phenomenon:\\nthe more you learn about something, the easier it is to learn more.\\nGrowing an audience is another: the more fans you have, the more\\nnew fans they\\\'ll bring you.<br /><br />'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.174069664815369, text='\\ninside.<br /><br /><br /><br /><br /><br />Let\\\'s talk a little more about the complicated business of figuring\\nout what to work on. The main reason it\\\'s hard is that you can\\\'t\\ntell what most kinds of work are like except by doing them. Which\\nmeans the four steps overlap: you may have to work at something for\\nyears before you know how much you like it or how good you are at\\nit. And in the meantime you\\\'re not doing, and thus not learning\\nabout, most other kinds of work. So in the worst case you choose\\nlate based on very incomplete information.\\n<font color=#dddddd>[<a href="#f4n"><font color=#dddddd>4</font></a>]</font><br /><br />The nature of ambition exacerbates this problem. Ambition comes in\\ntwo forms, one that precedes interest in the subject and one that\\ngrows out of it. Most people who do great work have a mix, and the\\nmore you have of the former, the harder it will be to decide what\\nto do.<br /><br />The educational systems in most countries pretend it\\\'s easy. They\\nexpect you to commit to a field long before you could know what\\nit\\\'s really like. And as a result an ambitious person on an optimal\\ntrajectory will often read to the system as an instance of breakage.<br /><br />It would be better if they at least admitted it &mdash; if they admitted\\nthat the system not only can\\\'t do much to help you figure out what\\nto work on, but is designed on the assumption that you\\\'ll somehow\\nmagically guess as a teenager. They don\\\'t tell you, but I will:\\nwhen it comes to figuring out what to work on, you\\\'re on your own.\\nSome people get lucky and do guess correctly, but the rest will\\nfind themselves scrambling diagonally across tracks laid down on\\nthe assumption that everyone does.<br /><br />What should you do if you\\\'re young and ambitious but don\\\'t know\\nwhat to work on? What you should <i>not</i> do is drift along passively,\\nassuming the problem will solve itself. You need to take action.\\nBut there is no systematic procedure you can follow. When you read\\nbiographies of people who\\\'ve done great work, it\\\'s remarkable how\\nmuch luck is involved. They discover what to work on as a result\\nof a chance meeting, or by reading a book they happen to pick up.\\nSo you need to make yourself a big target for luck, and the way to\\ndo that is to be curious. Try lots of things, meet lots of people,\\nread lots of books, ask lots of questions.\\n<font color=#dddddd>[<a href="#f5n"><font color=#dddddd>5</font></a>]</font><br /><br />When in doubt, optimize for interestingness. Fields change as you\\nlearn more about them. What mathematicians do, for example, is very\\ndifferent from what you do in high school math classes. So you need\\nto give different types of work a chance to show you what they\\\'re\\nlike. But a field should become <i>increasingly</i> interesting as you\\nlearn more about it. If it doesn\\\'t, it\\\'s probably not for you.<br /><br />Don\\\'t worry if you find you\\\'re interested in different things than\\nother people. The stranger your tastes in interestingness, the\\nbetter. Strange tastes are often strong ones, and a strong taste\\nfor work means you\\\'ll be productive. And you\\\'re more likely to find\\nnew things if you'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.158095578895721, text='. Don\\\'t copy the manner of\\nan eminent 50 year old professor if you\\\'re 18, for example, or the\\nidiom of a Renaissance poem hundreds of years later.<br /><br />Some of the features of things you admire are flaws they succeeded\\ndespite. Indeed, the features that are easiest to imitate are the\\nmost likely to be the flaws.<br /><br />This is particularly true for behavior. Some talented people are\\njerks, and this sometimes makes it seem to the inexperienced that\\nbeing a jerk is part of being talented. It isn\\\'t; being talented\\nis merely how they get away with it.<br /><br />One of the most powerful kinds of copying is to copy something from\\none field into another. History is so full of chance discoveries\\nof this type that it\\\'s probably worth giving chance a hand by\\ndeliberately learning about other kinds of work. You can take ideas\\nfrom quite distant fields if you let them be metaphors.<br /><br />Negative examples can be as inspiring as positive ones. In fact you\\ncan sometimes learn more from things done badly than from things\\ndone well; sometimes it only becomes clear what\\\'s needed when it\\\'s\\nmissing.<br /><br /><br /><br /><br /><br />\\nIf a lot of the best people in your field are collected in one\\nplace, it\\\'s usually a good idea to visit for a while. It will\\nincrease your ambition, and also, by showing you that these people\\nare human, increase your self-confidence.\\n<font color=#dddddd>[<a href="#f26n"><font color=#dddddd>26</font></a>]</font><br /><br />If you\\\'re earnest you\\\'ll probably get a warmer welcome than you\\nmight expect. Most people who are very good at something are happy\\nto talk about it with anyone who\\\'s genuinely interested. If they\\\'re\\nreally good at their work, then they probably have a hobbyist\\\'s\\ninterest in it, and hobbyists always want to talk about their\\nhobbies.<br /><br />It may take some effort to find the people who are really good,\\nthough. Doing great work has such prestige that in some places,\\nparticularly universities, there\\\'s a polite fiction that everyone\\nis engaged in it. And that is far from true. People within universities\\ncan\\\'t say so openly, but the quality of the work being done in\\ndifferent departments varies immensely. Some departments have people\\ndoing great work; others have in the past; others never have.<br /><br /><br /><br /><br /><br />\\nSeek out the best colleagues. There are a lot of projects that can\\\'t\\nbe done alone, and even if you\\\'re working on one that can be, it\\\'s\\ngood to have other people to encourage you and to bounce ideas off.<br /><br />Colleagues don\\\'t just affect your work, though; they also affect\\nyou. So work with people you want to become like, because you will.<br /><br />Quality is more important than quantity in colleagues. It\\\'s better\\nto have one or two great ones than a building full of pretty good\\nones. In fact it\\\'s not merely better, but necessary, judging from\\nhistory: the degree to which great work happens in clusters suggests\\nthat one\\\'s colleagues often make the difference between doing great\\nwork and not.<br /><br />How do you know when you have sufficiently good colleagues? In my\\nexperience, when you do, you know. Which means if you\\\'re unsure,\\nyou probably don\\\'t. But it may be possible to give a more concrete\\nanswer than that. Here\\\'s an attempt: sufficiently good'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.1566747762241967, text=',\\nbut in practice it usually means following them past all sorts of\\nobstacles. You usually have to risk rejection and failure. So it\\ndoes take a good deal of boldness.<br /><br />But while you need boldness, you don\\\'t usually need much planning.\\nIn most cases the recipe for doing great work is simply: work hard\\non excitingly ambitious projects, and something good will come of\\nit. Instead of making a plan and then executing it, you just try\\nto preserve certain invariants.<br /><br />The trouble with planning is that it only works for achievements\\nyou can describe in advance. You can win a gold medal or get rich\\nby deciding to as a child and then tenaciously pursuing that goal,\\nbut you can\\\'t discover natural selection that way.<br /><br />I think for most people who want to do great work, the right strategy\\nis not to plan too much. At each stage do whatever seems most\\ninteresting and gives you the best options for the future. I call\\nthis approach "staying upwind." This is how most people who\\\'ve done\\ngreat work seem to have done it.<br /><br /><br /><br /><br /><br />\\nEven when you\\\'ve found something exciting to work on, working on\\nit is not always straightforward. There will be times when some new\\nidea makes you leap out of bed in the morning and get straight to\\nwork. But there will also be plenty of times when things aren\\\'t\\nlike that.<br /><br />You don\\\'t just put out your sail and get blown forward by inspiration.\\nThere are headwinds and currents and hidden shoals. So there\\\'s a\\ntechnique to working, just as there is to sailing.<br /><br />For example, while you must work hard, it\\\'s possible to work too\\nhard, and if you do that you\\\'ll find you get diminishing returns:\\nfatigue will make you stupid, and eventually even damage your health.\\nThe point at which work yields diminishing returns depends on the\\ntype. Some of the hardest types you might only be able to do for\\nfour or five hours a day.<br /><br />Ideally those hours will be contiguous. To the extent you can, try\\nto arrange your life so you have big blocks of time to work in.\\nYou\\\'ll shy away from hard tasks if you know you might be interrupted.<br /><br />It will probably be harder to start working than to keep working.\\nYou\\\'ll often have to trick yourself to get over that initial\\nthreshold. Don\\\'t worry about this; it\\\'s the nature of work, not a\\nflaw in your character. Work has a sort of activation energy, both\\nper day and per project. And since this threshold is fake in the\\nsense that it\\\'s higher than the energy required to keep going, it\\\'s\\nok to tell yourself a lie of corresponding magnitude to get over\\nit.<br /><br />It\\\'s usually a mistake to lie to yourself if you want to do great\\nwork, but this is one of the rare cases where it isn\\\'t. When I\\\'m\\nreluctant to start work in the morning, I often trick myself by\\nsaying "I\\\'ll just read over what I\\\'ve got so far." Five minutes\\nlater I\\\'ve found something that seems mistaken or incomplete, and\\nI\\\'m off.<br /><br />Similar techniques work for starting new projects. It\\\'s ok to lie\\nto yourself about how much work a project will entail, for example.\\nLots of great things began with someone saying "How hard could it\\nbe?"<br /><br />This is one case where the young have an advantage. They\\\'re more'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.1349744395573516, text=' audience\\nin the traditional sense. Either way it doesn\\\'t need to be big.\\nThe value of an audience doesn\\\'t grow anything like linearly with\\nits size. Which is bad news if you\\\'re famous, but good news if\\nyou\\\'re just starting out, because it means a small but dedicated\\naudience can be enough to sustain you. If a handful of people\\ngenuinely love what you\\\'re doing, that\\\'s enough.<br /><br />To the extent you can, avoid letting intermediaries come between\\nyou and your audience. In some types of work this is inevitable,\\nbut it\\\'s so liberating to escape it that you might be better off\\nswitching to an adjacent type if that will let you go direct.\\n<font color=#dddddd>[<a href="#f28n"><font color=#dddddd>28</font></a>]</font><br /><br />The people you spend time with will also have a big effect on your\\nmorale. You\\\'ll find there are some who increase your energy and\\nothers who decrease it, and the effect someone has is not always\\nwhat you\\\'d expect. Seek out the people who increase your energy and\\navoid those who decrease it. Though of course if there\\\'s someone\\nyou need to take care of, that takes precedence.<br /><br />Don\\\'t marry someone who doesn\\\'t understand that you need to work,\\nor sees your work as competition for your attention. If you\\\'re\\nambitious, you need to work; it\\\'s almost like a medical condition;\\nso someone who won\\\'t let you work either doesn\\\'t understand you,\\nor does and doesn\\\'t care.<br /><br />Ultimately morale is physical. You think with your body, so it\\\'s\\nimportant to take care of it. That means exercising regularly,\\neating and sleeping well, and avoiding the more dangerous kinds of\\ndrugs. Running and walking are particularly good forms of exercise\\nbecause they\\\'re good for thinking.\\n<font color=#dddddd>[<a href="#f29n"><font color=#dddddd>29</font></a>]</font><br /><br />People who do great work are not necessarily happier than everyone\\nelse, but they\\\'re happier than they\\\'d be if they didn\\\'t. In fact,\\nif you\\\'re smart and ambitious, it\\\'s dangerous <i>not</i> to be productive.\\nPeople who are smart and ambitious but don\\\'t achieve much tend to\\nbecome bitter.<br /><br /><br /><br /><br /><br />\\nIt\\\'s ok to want to impress other people, but choose the right people.\\nThe opinion of people you respect is signal. Fame, which is the\\nopinion of a much larger group you might or might not respect, just\\nadds noise.<br /><br />The prestige of a type of work is at best a trailing indicator and\\nsometimes completely mistaken. If you do anything well enough,\\nyou\\\'ll make it prestigious. So the question to ask about a type of\\nwork is not how much prestige it has, but how well it could be done.<br /><br />Competition can be an effective motivator, but don\\\'t let it choose\\nthe problem for you; don\\\'t let yourself get drawn into chasing\\nsomething just because others are. In fact, don\\\'t let competitors\\nmake you do anything much more specific than work harder.<br /><br />Curiosity is the best guide. Your curiosity never lies, and it knows\\nmore than you do about what\\\'s worth paying attention to.<br /><br /><br /><br /><br /><br />\\nNotice how often that word has come up. If you asked an oracle the\\nsecret to doing great work and the oracle replied'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.123214818076958, text='b\'<html><head><meta name="Keywords" content="" /><title>How to Do Great Work</title><!-- <META NAME="ROBOTS" CONTENT="NOODP"> -->\\n<link rel="shortcut icon" href="http://ycombinator.com/arc/arc.png">\\n</head><body bgcolor="#ffffff" background="https://s.turbifycdn.com/aah/paulgraham/bel-6.gif" text="#000000" link="#000099" vlink="#464646"><table border="0" cellspacing="0" cellpadding="0"><tr valign="top"><td><map name=118ab66adb24b4f><area shape=rect coords="0,0,67,21" href="index.html"><area shape=rect coords="0,21,67,42" href="articles.html"><area shape=rect coords="0,42,67,63" href="http://www.amazon.com/gp/product/0596006624"><area shape=rect coords="0,63,67,84" href="books.html"><area shape=rect coords="0,84,67,105" href="http://ycombinator.com"><area shape=rect coords="0,105,67,126" href="arc.html"><area shape=rect coords="0,126,67,147" href="bel.html"><area shape=rect coords="0,147,67,168" href="lisp.html"><area shape=rect coords="0,168,67,189" href="antispam.html"><area shape=rect coords="0,189,67,210" href="kedrosky.html"><area shape=rect coords="0,210,67,231" href="faq.html"><area shape=rect coords="0,231,67,252" href="raq.html"><area shape=rect coords="0,252,67,273" href="quo.html"><area shape=rect coords="0,273,67,294" href="rss.html"><area shape=rect coords="0,294,67,315" href="bio.html"><area shape=rect coords="0,315,67,336" href="https://twitter.com/paulg"><area shape=rect coords="0,336,67,357" href="https://mas.to/@paulg"></map><img src="https://s.turbifycdn.com/aah/paulgraham/bel-7.gif" width="69" height="357" usemap=#118ab66adb24b4f border="0" hspace="0" vspace="0" ismap /></td><td><img src="https://sep.turbifycdn.com/ca/Img/trans_1x1.gif" height="1" width="26" border="0" /></td><td><a href="index.html"><img src="https://s.turbifycdn.com/aah/paulgraham/bel-8.gif" width="410" height="45" border="0" hspace="0" vspace="0" /></a><br /><br /><table border="0" cellspacing="0" cellpadding="0" width="435"><tr valign="top"><td width="435"><img src="https://s.turbifycdn.com/aah/paulgraham/how-to-do-great-work-2.gif" width="185" height="18" border="0" hspace="0" vspace="0" alt="How to Do Great Work" /><br /><br /><font size="2" face="verdana">July 2023<br /><br />If you collected lists of techniques for doing great work in a lot\\nof different fields, what would the intersection look like? I decided\\nto find out'), Result(attributes={}, file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='file-a98ada68681c4fbeba2201e9c7213fc3', score=1.1193194369249235, text=' dangerous kinds of\\ndrugs. Running and walking are particularly good forms of exercise\\nbecause they\\\'re good for thinking.\\n<font color=#dddddd>[<a href="#f29n"><font color=#dddddd>29</font></a>]</font><br /><br />People who do great work are not necessarily happier than everyone\\nelse, but they\\\'re happier than they\\\'d be if they didn\\\'t. In fact,\\nif you\\\'re smart and ambitious, it\\\'s dangerous <i>not</i> to be productive.\\nPeople who are smart and ambitious but don\\\'t achieve much tend to\\nbecome bitter.<br /><br /><br /><br /><br /><br />\\nIt\\\'s ok to want to impress other people, but choose the right people.\\nThe opinion of people you respect is signal. Fame, which is the\\nopinion of a much larger group you might or might not respect, just\\nadds noise.<br /><br />The prestige of a type of work is at best a trailing indicator and\\nsometimes completely mistaken. If you do anything well enough,\\nyou\\\'ll make it prestigious. So the question to ask about a type of\\nwork is not how much prestige it has, but how well it could be done.<br /><br />Competition can be an effective motivator, but don\\\'t let it choose\\nthe problem for you; don\\\'t let yourself get drawn into chasing\\nsomething just because others are. In fact, don\\\'t let competitors\\nmake you do anything much more specific than work harder.<br /><br />Curiosity is the best guide. Your curiosity never lies, and it knows\\nmore than you do about what\\\'s worth paying attention to.<br /><br /><br /><br /><br /><br />\\nNotice how often that word has come up. If you asked an oracle the\\nsecret to doing great work and the oracle replied with a single\\nword, my bet would be on "curiosity."<br /><br />That doesn\\\'t translate directly to advice. It\\\'s not enough just to\\nbe curious, and you can\\\'t command curiosity anyway. But you can\\nnurture it and let it drive you.<br /><br />Curiosity is the key to all four steps in doing great work: it will\\nchoose the field for you, get you to the frontier, cause you to\\nnotice the gaps in it, and drive you to explore them. The whole\\nprocess is a kind of dance with curiosity.<br /><br /><br /><br /><br /><br />\\nBelieve it or not, I tried to make this essay as short as I could.\\nBut its length at least means it acts as a filter. If you made it\\nthis far, you must be interested in doing great work. And if so\\nyou\\\'re already further along than you might realize, because the\\nset of people willing to want to is small.<br /><br />The factors in doing great work are factors in the literal,\\nmathematical sense, and they are: ability, interest, effort, and\\nluck. Luck by definition you can\\\'t do anything about, so we can\\nignore that. And we can assume effort, if you do in fact want to\\ndo great work. So the problem boils down to ability and interest.\\nCan you find a kind of work where your ability and interest will\\ncombine to yield an explosion of new ideas?<br /><br />Here there are grounds for optimism. There are so many different\\nways to do great work, and even more that are still undiscovered.\\nOut of all those different types of work, the one you\\\'re most suited\\nfor is probably a pretty close match. Probably a comically close\\nmatch. It\\\'s just a question of finding it, and how far into it')]), ResponseOutputMessage(id='msg_3591ea71-8b35-4efd-a5ad-c1c250801971', content=[ResponseOutputText(annotations=[AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=361, type='file_citation'), AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=676, type='file_citation'), AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=948, type='file_citation'), AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=1259, type='file_citation'), AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=1520, type='file_citation'), AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=1747, type='file_citation')], text='To do great work, consider the following principles:\n\n1. **Follow Your Interests**: Engage in work that genuinely excites you. If you find an area intriguing, pursue it without being overly concerned about external pressures or norms. You should create things that you would want for yourself, as this often aligns with what others in your circle might want too.\n\n2. **Work Hard on Ambitious Projects**: Ambition is vital, but it should be tempered by genuine interest. Instead of detailed planning for the future, focus on exciting projects that keep your options open. This approach, known as "staying upwind," allows for adaptability and can lead to unforeseen achievements.\n\n3. **Choose Quality Colleagues**: Collaborating with talented colleagues can significantly affect your own work. Seek out individuals who offer surprising insights and whom you admire. The presence of good colleagues can elevate the quality of your work and inspire you.\n\n4. **Maintain High Morale**: Your attitude towards work and life affects your performance. Cultivating optimism and viewing yourself as lucky rather than victimized can boost your productivity. It’s essential to care for your physical health as well since it directly impacts your mental faculties and morale.\n\n5. **Be Consistent**: Great work often comes from cumulative effort. Daily progress, even in small amounts, can result in substantial achievements over time. Emphasize consistency and make the work engaging, as this reduces the perceived burden of hard labor.\n\n6. **Embrace Curiosity**: Curiosity is a driving force that can guide you in selecting fields of interest, pushing you to explore uncharted territories. Allow it to shape your work and continually seek knowledge and insights.\n\nBy focusing on these aspects, you can create an environment conducive to great work and personal fulfillment.', type='output_text', logprobs=None)], role='assistant', status='completed', type='message')], parallel_tool_calls=False, temperature=None, tool_choice=None, tools=None, top_p=None, background=None, conversation=None, max_output_tokens=None, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, reasoning=None, safety_identifier=None, service_tier=None, status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text'), verbosity=None), top_logprobs=None, truncation=None, usage=None, user=None)

In [34]: resp.output[1].content[0].text
Out[34]: 'To do great work, consider the following principles:\n\n1. **Follow Your Interests**: Engage in work that genuinely excites you. If you find an area intriguing, pursue it without being overly concerned about external pressures or norms. You should create things that you would want for yourself, as this often aligns with what others in your circle might want too.\n\n2. **Work Hard on Ambitious Projects**: Ambition is vital, but it should be tempered by genuine interest. Instead of detailed planning for the future, focus on exciting projects that keep your options open. This approach, known as "staying upwind," allows for adaptability and can lead to unforeseen achievements.\n\n3. **Choose Quality Colleagues**: Collaborating with talented colleagues can significantly affect your own work. Seek out individuals who offer surprising insights and whom you admire. The presence of good colleagues can elevate the quality of your work and inspire you.\n\n4. **Maintain High Morale**: Your attitude towards work and life affects your performance. Cultivating optimism and viewing yourself as lucky rather than victimized can boost your productivity. It’s essential to care for your physical health as well since it directly impacts your mental faculties and morale.\n\n5. **Be Consistent**: Great work often comes from cumulative effort. Daily progress, even in small amounts, can result in substantial achievements over time. Emphasize consistency and make the work engaging, as this reduces the perceived burden of hard labor.\n\n6. **Embrace Curiosity**: Curiosity is a driving force that can guide you in selecting fields of interest, pushing you to explore uncharted territories. Allow it to shape your work and continually seek knowledge and insights.\n\nBy focusing on these aspects, you can create an environment conducive to great work and personal fulfillment.'
```
</details>

The relevant output looks like this:

```python
>resp.output[1].content[0].annotations
[AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=361, type='file_citation'),
 AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=676, type='file_citation'),
 AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=948, type='file_citation'),
 AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=1259, type='file_citation'),
 AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=1520, type='file_citation'),
 AnnotationFileCitation(file_id='file-a98ada68681c4fbeba2201e9c7213fc3', filename='https://www.paulgraham.com/greatwork.html', index=1747, type='file_citation')]```
And
```python
In [144]: print(resp.output[1].content[0].text)
To do great work, consider the following principles:

1. **Follow Your Interests**: Engage in work that genuinely excites you.
If you find an area intriguing, pursue it without being overly concerned
about external pressures or norms. You should create things that you
would want for yourself, as this often aligns with what others in your
circle might want too.

2. **Work Hard on Ambitious Projects**: Ambition is vital, but it should
be tempered by genuine interest. Instead of detailed planning for the
future, focus on exciting projects that keep your options open. This
approach, known as "staying upwind," allows for adaptability and can
lead to unforeseen achievements.

3. **Choose Quality Colleagues**: Collaborating with talented colleagues
can significantly affect your own work. Seek out individuals who offer
surprising insights and whom you admire. The presence of good colleagues
can elevate the quality of your work and inspire you.

4. **Maintain High Morale**: Your attitude towards work and life affects
your performance. Cultivating optimism and viewing yourself as lucky
rather than victimized can boost your productivity. It’s essential to
care for your physical health as well since it directly impacts your
mental faculties and morale.

5. **Be Consistent**: Great work often comes from cumulative effort.
Daily progress, even in small amounts, can result in substantial
achievements over time. Emphasize consistency and make the work
engaging, as this reduces the perceived burden of hard labor.

6. **Embrace Curiosity**: Curiosity is a driving force that can guide
you in selecting fields of interest, pushing you to explore uncharted
territories. Allow it to shape your work and continually seek knowledge
and insights.

By focusing on these aspects, you can create an environment conducive to
great work and personal fulfillment.
```

And the code below outputs only periods highlighting that the position/index behaves as expected—i.e., the annotation happens at the end of the sentence.

```python
print([resp.output[1].content[0].text[j.index] for j in
resp.output[1].content[0].annotations])
Out[41]: ['.', '.', '.', '.', '.', '.']
```

## Test Plan
Unit tests added.

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../meta_reference/responses/streaming.py     |  9 ++-
 .../meta_reference/responses/tool_executor.py | 44 ++++++++++++--
 .../agents/meta_reference/responses/types.py  |  1 +
 .../agents/meta_reference/responses/utils.py  | 57 ++++++++++++++++++-
 .../inline/tool_runtime/rag/memory.py         |  5 +-
 .../utils/memory/openai_vector_store_mixin.py |  7 ++-
 .../test_response_conversion_utils.py         | 25 ++++++++
 7 files changed, 136 insertions(+), 12 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 0bb524f5c..8a662e6db 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -97,6 +97,8 @@ class StreamingResponseOrchestrator:
         self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = {}
         # Track final messages after all tool executions
         self.final_messages: list[OpenAIMessageParam] = []
+        # mapping for annotations
+        self.citation_files: dict[str, str] = {}
 
     async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
         # Initialize output messages
@@ -126,6 +128,7 @@ class StreamingResponseOrchestrator:
             # Text is the default response format for chat completion so don't need to pass it
             # (some providers don't support non-empty response_format when tools are present)
             response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
+            logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
             completion_result = await self.inference_api.openai_chat_completion(
                 model=self.ctx.model,
                 messages=messages,
@@ -160,7 +163,7 @@ class StreamingResponseOrchestrator:
             # Handle choices with no tool calls
             for choice in current_response.choices:
                 if not (choice.message.tool_calls and self.ctx.response_tools):
-                    output_messages.append(await convert_chat_choice_to_response_message(choice))
+                    output_messages.append(await convert_chat_choice_to_response_message(choice, self.citation_files))
 
             # Execute tool calls and coordinate results
             async for stream_event in self._coordinate_tool_execution(
@@ -211,6 +214,8 @@ class StreamingResponseOrchestrator:
 
         for choice in current_response.choices:
             next_turn_messages.append(choice.message)
+            logger.debug(f"Choice message content: {choice.message.content}")
+            logger.debug(f"Choice message tool_calls: {choice.message.tool_calls}")
 
             if choice.message.tool_calls and self.ctx.response_tools:
                 for tool_call in choice.message.tool_calls:
@@ -470,6 +475,8 @@ class StreamingResponseOrchestrator:
                     tool_call_log = result.final_output_message
                     tool_response_message = result.final_input_message
                     self.sequence_number = result.sequence_number
+                    if result.citation_files:
+                        self.citation_files.update(result.citation_files)
 
             if tool_call_log:
                 output_messages.append(tool_call_log)
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index b028c018b..b33b47454 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -94,7 +94,10 @@ class ToolExecutor:
 
         # Yield the final result
         yield ToolExecutionResult(
-            sequence_number=sequence_number, final_output_message=output_message, final_input_message=input_message
+            sequence_number=sequence_number,
+            final_output_message=output_message,
+            final_input_message=input_message,
+            citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
         )
 
     async def _execute_knowledge_search_via_vector_store(
@@ -129,8 +132,6 @@ class ToolExecutor:
         for results in all_results:
             search_results.extend(results)
 
-        # Convert search results to tool result format matching memory.py
-        # Format the results as interleaved content similar to memory.py
         content_items = []
         content_items.append(
             TextContentItem(
@@ -138,27 +139,58 @@ class ToolExecutor:
             )
         )
 
+        unique_files = set()
         for i, result_item in enumerate(search_results):
             chunk_text = result_item.content[0].text if result_item.content else ""
-            metadata_text = f"document_id: {result_item.file_id}, score: {result_item.score}"
+            # Get file_id from attributes if result_item.file_id is empty
+            file_id = result_item.file_id or (
+                result_item.attributes.get("document_id") if result_item.attributes else None
+            )
+            metadata_text = f"document_id: {file_id}, score: {result_item.score}"
             if result_item.attributes:
                 metadata_text += f", attributes: {result_item.attributes}"
-            text_content = f"[{i + 1}] {metadata_text}\n{chunk_text}\n"
+
+            text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n"
             content_items.append(TextContentItem(text=text_content))
+            unique_files.add(file_id)
 
         content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
+
+        citation_instruction = ""
+        if unique_files:
+            citation_instruction = (
+                " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
+                "Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
+            )
+
         content_items.append(
             TextContentItem(
-                text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.\n',
+                text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n',
             )
         )
 
+        # handling missing attributes for old versions
+        citation_files = {}
+        for result in search_results:
+            file_id = result.file_id
+            if not file_id and result.attributes:
+                file_id = result.attributes.get("document_id")
+
+            filename = result.filename
+            if not filename and result.attributes:
+                filename = result.attributes.get("filename")
+            if not filename:
+                filename = "unknown"
+
+            citation_files[file_id] = filename
+
         return ToolInvocationResult(
             content=content_items,
             metadata={
                 "document_ids": [r.file_id for r in search_results],
                 "chunks": [r.content[0].text if r.content else "" for r in search_results],
                 "scores": [r.score for r in search_results],
+                "citation_files": citation_files,
             },
         )
 
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index d3b5a16bd..fd5f44242 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -27,6 +27,7 @@ class ToolExecutionResult(BaseModel):
     sequence_number: int
     final_output_message: OpenAIResponseOutput | None = None
     final_input_message: OpenAIMessageParam | None = None
+    citation_files: dict[str, str] | None = None
 
 
 @dataclass
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 310a88298..5b013b9c4 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -4,9 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import re
 import uuid
 
 from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
     OpenAIResponseInput,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContent,
@@ -45,7 +47,9 @@ from llama_stack.apis.inference import (
 )
 
 
-async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
+async def convert_chat_choice_to_response_message(
+    choice: OpenAIChoice, citation_files: dict[str, str] | None = None
+) -> OpenAIResponseMessage:
     """Convert an OpenAI Chat Completion choice into an OpenAI Response output message."""
     output_content = ""
     if isinstance(choice.message.content, str):
@@ -57,9 +61,11 @@ async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenA
             f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
         )
 
+    annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {})
+
     return OpenAIResponseMessage(
         id=f"msg_{uuid.uuid4()}",
-        content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
+        content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
         status="completed",
         role="assistant",
     )
@@ -200,6 +206,53 @@ async def get_message_type_by_role(role: str):
     return role_to_type.get(role)
 
 
+def _extract_citations_from_text(
+    text: str, citation_files: dict[str, str]
+) -> tuple[list[OpenAIResponseAnnotationFileCitation], str]:
+    """Extract citation markers from text and create annotations
+
+    Args:
+        text: The text containing citation markers like [file-Cn3MSNn72ENTiiq11Qda4A]
+        citation_files: Dictionary mapping file_id to filename
+
+    Returns:
+        Tuple of (annotations_list, clean_text_without_markers)
+    """
+    file_id_regex = re.compile(r"<\|(?P<file_id>file-[A-Za-z0-9_-]+)\|>")
+
+    annotations = []
+    parts = []
+    total_len = 0
+    last_end = 0
+
+    for m in file_id_regex.finditer(text):
+        # segment before the marker
+        prefix = text[last_end : m.start()]
+
+        # drop one space if it exists (since marker is at sentence end)
+        if prefix.endswith(" "):
+            prefix = prefix[:-1]
+
+        parts.append(prefix)
+        total_len += len(prefix)
+
+        fid = m.group(1)
+        if fid in citation_files:
+            annotations.append(
+                OpenAIResponseAnnotationFileCitation(
+                    file_id=fid,
+                    filename=citation_files[fid],
+                    index=total_len,  # index points to punctuation
+                )
+            )
+
+        last_end = m.end()
+
+    parts.append(text[last_end:])
+    cleaned_text = "".join(parts)
+    return annotations, cleaned_text
+
+
 def is_function_tool_call(
     tool_call: OpenAIChatCompletionToolCall,
     tools: list[OpenAIResponseInputTool],
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index c8499a9b8..aac86a056 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -331,5 +331,8 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
 
         return ToolInvocationResult(
             content=result.content or [],
-            metadata=result.metadata,
+            metadata={
+                **(result.metadata or {}),
+                "citation_files": getattr(result, "citation_files", None),
+            },
         )
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 0d0aa25a4..97079c3b3 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -587,7 +587,7 @@ class OpenAIVectorStoreMixin(ABC):
                 content = self._chunk_to_vector_store_content(chunk)
 
                 response_data_item = VectorStoreSearchResponse(
-                    file_id=chunk.metadata.get("file_id", ""),
+                    file_id=chunk.metadata.get("document_id", ""),
                     filename=chunk.metadata.get("filename", ""),
                     score=score,
                     attributes=chunk.metadata,
@@ -746,12 +746,15 @@ class OpenAIVectorStoreMixin(ABC):
 
             content = content_from_data_and_mime_type(content_response.body, mime_type)
 
+            chunk_attributes = attributes.copy()
+            chunk_attributes["filename"] = file_response.filename
+
             chunks = make_overlapped_chunks(
                 file_id,
                 content,
                 max_chunk_size_tokens,
                 chunk_overlap_tokens,
-                attributes,
+                chunk_attributes,
             )
             if not chunks:
                 vector_store_file_object.status = "failed"
diff --git a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
index 187540f82..2698b88c8 100644
--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@@ -8,6 +8,7 @@
 import pytest
 
 from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
@@ -35,6 +36,7 @@ from llama_stack.apis.inference import (
     OpenAIUserMessageParam,
 )
 from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    _extract_citations_from_text,
     convert_chat_choice_to_response_message,
     convert_response_content_to_chat_content,
     convert_response_input_to_chat_messages,
@@ -340,3 +342,26 @@ class TestIsFunctionToolCall:
 
         result = is_function_tool_call(tool_call, tools)
         assert result is False
+
+
+class TestExtractCitationsFromText:
+    def test_extract_citations_and_annotations(self):
+        text = "Start [not-a-file]. New source <|file-abc123|>. "
+        text += "Other source <|file-def456|>? Repeat source <|file-abc123|>! No citation."
+        file_mapping = {"file-abc123": "doc1.pdf", "file-def456": "doc2.txt"}
+
+        annotations, cleaned_text = _extract_citations_from_text(text, file_mapping)
+
+        expected_annotations = [
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=30),
+            OpenAIResponseAnnotationFileCitation(file_id="file-def456", filename="doc2.txt", index=44),
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=59),
+        ]
+        expected_clean_text = "Start [not-a-file]. New source. Other source? Repeat source! No citation."
+
+        assert cleaned_text == expected_clean_text
+        assert annotations == expected_annotations
+        # OpenAI cites at the end of the sentence
+        assert cleaned_text[expected_annotations[0].index] == "."
+        assert cleaned_text[expected_annotations[1].index] == "?"
+        assert cleaned_text[expected_annotations[2].index] == "!"