Merge branch 'llamastack:main' into model_unregisteration_error_message

2025-12-12 20:12:33 +00:00 · 2025-10-07 11:08:46 -07:00 · 2025-10-07 11:08:46 -07:00 · 1180626a22
commit 1180626a22
parent aa09a44c94 d5b136ac66
103 changed files with 11265 additions and 704 deletions
--- a/docs/docs/providers/files/index.mdx
+++ b/docs/docs/providers/files/index.mdx
@ -1,4 +1,7 @@
 ---
+description: "Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs."
 sidebar_label: Files
 title: Files
 ---
@ -7,4 +10,8 @@ title: Files

 ## Overview

+Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs.
+
 This section contains documentation for all available providers for the **files** API.
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@ -1,5 +1,7 @@
 ---
-description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
+description: "Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.

    This API provides the raw interface to the underlying models. Two kinds of models are supported:
    - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
@ -12,7 +14,9 @@ title: Inference

 ## Overview

-Llama Stack Inference API for generating completions, chat completions, and embeddings.
+Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.

    This API provides the raw interface to the underlying models. Two kinds of models are supported:
    - LLM models: these models generate "raw" and "chat" (conversational) completions.
--- a/docs/docs/providers/inference/remote_anthropic.mdx
+++ b/docs/docs/providers/inference/remote_anthropic.mdx
@ -15,6 +15,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | API key for Anthropic models |

 ## Sample Configuration
--- a/docs/docs/providers/inference/remote_azure.mdx
+++ b/docs/docs/providers/inference/remote_azure.mdx
@ -22,6 +22,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `<class 'pydantic.types.SecretStr'>` | No |  | Azure API key for Azure |
 | `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
 | `api_version` | `str \| None` | No |  | Azure API version for Azure (e.g., 2024-12-01-preview) |
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@ -15,6 +15,7 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
--- a/docs/docs/providers/inference/remote_cerebras.mdx
+++ b/docs/docs/providers/inference/remote_cerebras.mdx
@ -15,6 +15,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
 | `api_key` | `<class 'pydantic.types.SecretStr'>` | No |  | Cerebras API Key |

--- a/docs/docs/providers/inference/remote_databricks.mdx
+++ b/docs/docs/providers/inference/remote_databricks.mdx
@ -15,6 +15,7 @@ Databricks inference provider for running models on Databricks' unified analytic
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `str \| None` | No |  | The URL for the Databricks model serving endpoint |
 | `api_token` | `<class 'pydantic.types.SecretStr'>` | No |  | The Databricks API token |

--- a/docs/docs/providers/inference/remote_fireworks.mdx
+++ b/docs/docs/providers/inference/remote_fireworks.mdx
@ -15,6 +15,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The Fireworks.ai API Key |

--- a/docs/docs/providers/inference/remote_gemini.mdx
+++ b/docs/docs/providers/inference/remote_gemini.mdx
@ -15,6 +15,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | API key for Gemini models |

 ## Sample Configuration
--- a/docs/docs/providers/inference/remote_groq.mdx
+++ b/docs/docs/providers/inference/remote_groq.mdx
@ -15,6 +15,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | The Groq API key |
 | `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |

--- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx
+++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
@ -15,6 +15,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | The Llama API key |
 | `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |

--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@ -15,6 +15,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The NVIDIA API key, only needed of using the hosted service |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
--- a/docs/docs/providers/inference/remote_ollama.mdx
+++ b/docs/docs/providers/inference/remote_ollama.mdx
@ -15,8 +15,8 @@ Ollama inference provider for running local models through the Ollama runtime.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | http://localhost:11434 |  |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |

 ## Sample Configuration

--- a/docs/docs/providers/inference/remote_openai.mdx
+++ b/docs/docs/providers/inference/remote_openai.mdx
@ -15,6 +15,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `api_key` | `str \| None` | No |  | API key for OpenAI models |
 | `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |

--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@ -15,6 +15,7 @@ Passthrough inference provider for connecting to any external inference service
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | API Key for the passthrouth endpoint |

--- a/docs/docs/providers/inference/remote_runpod.mdx
+++ b/docs/docs/providers/inference/remote_runpod.mdx
@ -15,6 +15,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
 | `api_token` | `str \| None` | No |  | The API token |

--- a/docs/docs/providers/inference/remote_sambanova.mdx
+++ b/docs/docs/providers/inference/remote_sambanova.mdx
@ -15,6 +15,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The SambaNova cloud API Key |

--- a/docs/docs/providers/inference/remote_tgi.mdx
+++ b/docs/docs/providers/inference/remote_tgi.mdx
@ -15,6 +15,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No |  | The URL for the TGI serving endpoint |

 ## Sample Configuration
--- a/docs/docs/providers/inference/remote_together.mdx
+++ b/docs/docs/providers/inference/remote_together.mdx
@ -15,6 +15,7 @@ Together AI inference provider for open-source models and collaborative AI devel
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The Together AI API Key |

--- a/docs/docs/providers/inference/remote_vertexai.mdx
+++ b/docs/docs/providers/inference/remote_vertexai.mdx
@ -54,6 +54,7 @@ Available Models:
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `project` | `<class 'str'>` | No |  | Google Cloud project ID for Vertex AI |
 | `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |

--- a/docs/docs/providers/inference/remote_vllm.mdx
+++ b/docs/docs/providers/inference/remote_vllm.mdx
@ -15,11 +15,11 @@ Remote vLLM inference provider for connecting to vLLM servers.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
 | `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
 | `api_token` | `str \| None` | No | fake | The API token |
 | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically |

 ## Sample Configuration

--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@ -15,6 +15,7 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
 | `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key |
 | `project_id` | `str \| None` | No |  | The Project ID key |
--- a/docs/docs/providers/safety/index.mdx
+++ b/docs/docs/providers/safety/index.mdx
@ -1,4 +1,7 @@
 ---
+description: "Safety
+
+    OpenAI-compatible Moderations API."
 sidebar_label: Safety
 title: Safety
 ---
@ -7,4 +10,8 @@ title: Safety

 ## Overview

+Safety
+
+    OpenAI-compatible Moderations API.
+
 This section contains documentation for all available providers for the **safety** API.
--- a/docs/docs/providers/safety/remote_bedrock.mdx
+++ b/docs/docs/providers/safety/remote_bedrock.mdx
@ -15,6 +15,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -1443,8 +1443,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
+                "summary": "List chat completions.",
+                "description": "List chat completions.",
                "parameters": [
                    {
                        "name": "after",
@ -1520,8 +1520,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "summary": "Create chat completions.",
+                "description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1565,8 +1565,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
+                "summary": "Get chat completion.",
+                "description": "Get chat completion.\nDescribe a chat completion by its ID.",
                "parameters": [
                    {
                        "name": "completion_id",
@ -1610,8 +1610,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "summary": "Create completion.",
+                "description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1655,8 +1655,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "summary": "Create embeddings.",
+                "description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1700,8 +1700,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
+                "summary": "List files.",
+                "description": "List files.\nReturns a list of files that belong to the user's organization.",
                "parameters": [
                    {
                        "name": "after",
@ -1770,8 +1770,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
+                "summary": "Upload file.",
+                "description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1831,8 +1831,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
+                "summary": "Retrieve file.",
+                "description": "Retrieve file.\nReturns information about a specific file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1874,8 +1874,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
+                "summary": "Delete file.",
+                "description": "Delete file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1919,8 +1919,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
+                "summary": "Retrieve file content.",
+                "description": "Retrieve file content.\nReturns the contents of the specified file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1999,8 +1999,8 @@
                "tags": [
                    "Safety"
                ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "summary": "Create moderation.",
+                "description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -2044,8 +2044,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
+                "summary": "List all responses.",
+                "description": "List all responses.",
                "parameters": [
                    {
                        "name": "after",
@ -2119,8 +2119,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "Create a model response.",
+                "description": "Create a model response.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -2184,8 +2184,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
+                "summary": "Get a model response.",
+                "description": "Get a model response.",
                "parameters": [
                    {
                        "name": "response_id",
@ -2227,8 +2227,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
+                "summary": "Delete a response.",
+                "description": "Delete a response.",
                "parameters": [
                    {
                        "name": "response_id",
@ -2272,8 +2272,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
+                "summary": "List input items.",
+                "description": "List input items.",
                "parameters": [
                    {
                        "name": "response_id",
@ -13366,12 +13366,13 @@
        },
        {
            "name": "Files",
-            "description": ""
+            "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+            "x-displayName": "Files"
        },
        {
            "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
-            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Inference"
        },
        {
            "name": "Models",
@ -13383,7 +13384,8 @@
        },
        {
            "name": "Safety",
-            "description": ""
+            "description": "OpenAI-compatible Moderations API.",
+            "x-displayName": "Safety"
        },
        {
            "name": "Telemetry",
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -1033,8 +1033,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
+      summary: List chat completions.
+      description: List chat completions.
      parameters:
        - name: after
          in: query
@ -1087,10 +1087,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
+      summary: Create chat completions.
      description: >-
+        Create chat completions.
+
        Generate an OpenAI-compatible chat completion for the given messages using
        the specified model.
      parameters: []
@ -1122,8 +1122,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
+      summary: Get chat completion.
+      description: >-
+        Get chat completion.
+
+        Describe a chat completion by its ID.
      parameters:
        - name: completion_id
          in: path
@ -1153,10 +1156,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
+      summary: Create completion.
      description: >-
+        Create completion.
+
        Generate an OpenAI-compatible completion for the given prompt using the specified
        model.
      parameters: []
@ -1189,10 +1192,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
+      summary: Create embeddings.
      description: >-
+        Create embeddings.
+
        Generate OpenAI-compatible embeddings for the given input using the specified
        model.
      parameters: []
@ -1225,9 +1228,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
+      summary: List files.
      description: >-
+        List files.
+
        Returns a list of files that belong to the user's organization.
      parameters:
        - name: after
@ -1285,11 +1289,13 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
+      summary: Upload file.
      description: >-
+        Upload file.
+
        Upload a file that can be used across various endpoints.

+
        The file upload should be a multipart form request with:

        - file: The File object (not file name) to be uploaded.
@ -1338,9 +1344,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns information about a specific file.
+      summary: Retrieve file.
      description: >-
+        Retrieve file.
+
        Returns information about a specific file.
      parameters:
        - name: file_id
@ -1372,8 +1379,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: Delete a file.
-      description: Delete a file.
+      summary: Delete file.
+      description: Delete file.
      parameters:
        - name: file_id
          in: path
@ -1405,9 +1412,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns the contents of the specified file.
+      summary: Retrieve file content.
      description: >-
+        Retrieve file content.
+
        Returns the contents of the specified file.
      parameters:
        - name: file_id
@ -1464,9 +1472,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
+      summary: Create moderation.
      description: >-
+        Create moderation.
+
        Classifies if text and/or image inputs are potentially harmful.
      parameters: []
      requestBody:
@ -1497,8 +1506,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
+      summary: List all responses.
+      description: List all responses.
      parameters:
        - name: after
          in: query
@ -1549,8 +1558,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: Create a model response.
+      description: Create a model response.
      parameters: []
      requestBody:
        content:
@ -1592,8 +1601,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
+      summary: Get a model response.
+      description: Get a model response.
      parameters:
        - name: response_id
          in: path
@ -1623,8 +1632,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
+      summary: Delete a response.
+      description: Delete a response.
      parameters:
        - name: response_id
          in: path
@ -1654,10 +1663,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
+      summary: List input items.
+      description: List input items.
      parameters:
        - name: response_id
          in: path
@ -10011,9 +10018,16 @@ tags:
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: Files
-    description: ''
+    description: >-
+      This API is used to upload documents that can be used with other Llama Stack
+      APIs.
+    x-displayName: Files
  - name: Inference
    description: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
+
+
      This API provides the raw interface to the underlying models. Two kinds of models
      are supported:

@ -10021,15 +10035,14 @@ tags:

      - Embedding models: these models generate embeddings to be used for semantic
      search.
-    x-displayName: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
+    x-displayName: Inference
  - name: Models
    description: ''
  - name: PostTraining (Coming Soon)
    description: ''
  - name: Safety
-    description: ''
+    description: OpenAI-compatible Moderations API.
+    x-displayName: Safety
  - name: Telemetry
    description: ''
  - name: VectorIO
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -69,8 +69,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
+                "summary": "List chat completions.",
+                "description": "List chat completions.",
                "parameters": [
                    {
                        "name": "after",
@ -146,8 +146,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "summary": "Create chat completions.",
+                "description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -191,8 +191,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
+                "summary": "Get chat completion.",
+                "description": "Get chat completion.\nDescribe a chat completion by its ID.",
                "parameters": [
                    {
                        "name": "completion_id",
@ -236,8 +236,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "summary": "Create completion.",
+                "description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -758,8 +758,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "summary": "Create embeddings.",
+                "description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -803,8 +803,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
+                "summary": "List files.",
+                "description": "List files.\nReturns a list of files that belong to the user's organization.",
                "parameters": [
                    {
                        "name": "after",
@ -873,8 +873,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
+                "summary": "Upload file.",
+                "description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -934,8 +934,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
+                "summary": "Retrieve file.",
+                "description": "Retrieve file.\nReturns information about a specific file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -977,8 +977,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
+                "summary": "Delete file.",
+                "description": "Delete file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1022,8 +1022,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
+                "summary": "Retrieve file content.",
+                "description": "Retrieve file content.\nReturns the contents of the specified file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1067,8 +1067,8 @@
                "tags": [
                    "Inspect"
                ],
-                "summary": "Get the current health status of the service.",
-                "description": "Get the current health status of the service.",
+                "summary": "Get health status.",
+                "description": "Get health status.\nGet the current health status of the service.",
                "parameters": [],
                "deprecated": false
            }
@ -1102,8 +1102,8 @@
                "tags": [
                    "Inspect"
                ],
-                "summary": "List all available API routes with their methods and implementing providers.",
-                "description": "List all available API routes with their methods and implementing providers.",
+                "summary": "List routes.",
+                "description": "List routes.\nList all available API routes with their methods and implementing providers.",
                "parameters": [],
                "deprecated": false
            }
@ -1170,8 +1170,8 @@
                "tags": [
                    "Models"
                ],
-                "summary": "Register a model.",
-                "description": "Register a model.",
+                "summary": "Register model.",
+                "description": "Register model.\nRegister a model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1215,8 +1215,8 @@
                "tags": [
                    "Models"
                ],
-                "summary": "Get a model by its identifier.",
-                "description": "Get a model by its identifier.",
+                "summary": "Get model.",
+                "description": "Get model.\nGet a model by its identifier.",
                "parameters": [
                    {
                        "name": "model_id",
@ -1251,8 +1251,8 @@
                "tags": [
                    "Models"
                ],
-                "summary": "Unregister a model.",
-                "description": "Unregister a model.",
+                "summary": "Unregister model.",
+                "description": "Unregister model.\nUnregister a model.",
                "parameters": [
                    {
                        "name": "model_id",
@ -1296,8 +1296,8 @@
                "tags": [
                    "Safety"
                ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "summary": "Create moderation.",
+                "description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1374,8 +1374,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Create a new prompt.",
-                "description": "Create a new prompt.",
+                "summary": "Create prompt.",
+                "description": "Create prompt.\nCreate a new prompt.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1419,8 +1419,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Get a prompt by its identifier and optional version.",
-                "description": "Get a prompt by its identifier and optional version.",
+                "summary": "Get prompt.",
+                "description": "Get prompt.\nGet a prompt by its identifier and optional version.",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1471,8 +1471,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Update an existing prompt (increments version).",
-                "description": "Update an existing prompt (increments version).",
+                "summary": "Update prompt.",
+                "description": "Update prompt.\nUpdate an existing prompt (increments version).",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1517,8 +1517,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Delete a prompt.",
-                "description": "Delete a prompt.",
+                "summary": "Delete prompt.",
+                "description": "Delete prompt.\nDelete a prompt.",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1562,8 +1562,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Set which version of a prompt should be the default in get_prompt (latest).",
-                "description": "Set which version of a prompt should be the default in get_prompt (latest).",
+                "summary": "Set prompt version.",
+                "description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1617,8 +1617,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "List all versions of a specific prompt.",
-                "description": "List all versions of a specific prompt.",
+                "summary": "List prompt versions.",
+                "description": "List prompt versions.\nList all versions of a specific prompt.",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1662,8 +1662,8 @@
                "tags": [
                    "Providers"
                ],
-                "summary": "List all available providers.",
-                "description": "List all available providers.",
+                "summary": "List providers.",
+                "description": "List providers.\nList all available providers.",
                "parameters": [],
                "deprecated": false
            }
@ -1697,8 +1697,8 @@
                "tags": [
                    "Providers"
                ],
-                "summary": "Get detailed information about a specific provider.",
-                "description": "Get detailed information about a specific provider.",
+                "summary": "Get provider.",
+                "description": "Get provider.\nGet detailed information about a specific provider.",
                "parameters": [
                    {
                        "name": "provider_id",
@ -1742,8 +1742,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
+                "summary": "List all responses.",
+                "description": "List all responses.",
                "parameters": [
                    {
                        "name": "after",
@ -1817,8 +1817,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "Create a model response.",
+                "description": "Create a model response.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1882,8 +1882,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
+                "summary": "Get a model response.",
+                "description": "Get a model response.",
                "parameters": [
                    {
                        "name": "response_id",
@ -1925,8 +1925,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
+                "summary": "Delete a response.",
+                "description": "Delete a response.",
                "parameters": [
                    {
                        "name": "response_id",
@ -1970,8 +1970,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
+                "summary": "List input items.",
+                "description": "List input items.",
                "parameters": [
                    {
                        "name": "response_id",
@ -2063,8 +2063,8 @@
                "tags": [
                    "Safety"
                ],
-                "summary": "Run a shield.",
-                "description": "Run a shield.",
+                "summary": "Run shield.",
+                "description": "Run shield.\nRun a shield.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -4196,8 +4196,8 @@
                "tags": [
                    "Inspect"
                ],
-                "summary": "Get the version of the service.",
-                "description": "Get the version of the service.",
+                "summary": "Get version.",
+                "description": "Get version.\nGet the version of the service.",
                "parameters": [],
                "deprecated": false
            }
@ -12914,16 +12914,18 @@
        },
        {
            "name": "Files",
-            "description": ""
+            "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+            "x-displayName": "Files"
        },
        {
            "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
-            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Inference"
        },
        {
            "name": "Inspect",
-            "description": ""
+            "description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
+            "x-displayName": "Inspect"
        },
        {
            "name": "Models",
@ -12931,17 +12933,18 @@
        },
        {
            "name": "Prompts",
-            "description": "",
-            "x-displayName": "Protocol for prompt management operations."
+            "description": "Protocol for prompt management operations.",
+            "x-displayName": "Prompts"
        },
        {
            "name": "Providers",
-            "description": "",
-            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
+            "description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
+            "x-displayName": "Providers"
        },
        {
            "name": "Safety",
-            "description": ""
+            "description": "OpenAI-compatible Moderations API.",
+            "x-displayName": "Safety"
        },
        {
            "name": "Scoring",
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -33,8 +33,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
+      summary: List chat completions.
+      description: List chat completions.
      parameters:
        - name: after
          in: query
@ -87,10 +87,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
+      summary: Create chat completions.
      description: >-
+        Create chat completions.
+
        Generate an OpenAI-compatible chat completion for the given messages using
        the specified model.
      parameters: []
@ -122,8 +122,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
+      summary: Get chat completion.
+      description: >-
+        Get chat completion.
+
+        Describe a chat completion by its ID.
      parameters:
        - name: completion_id
          in: path
@ -153,10 +156,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
+      summary: Create completion.
      description: >-
+        Create completion.
+
        Generate an OpenAI-compatible completion for the given prompt using the specified
        model.
      parameters: []
@ -603,10 +606,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
+      summary: Create embeddings.
      description: >-
+        Create embeddings.
+
        Generate OpenAI-compatible embeddings for the given input using the specified
        model.
      parameters: []
@ -639,9 +642,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
+      summary: List files.
      description: >-
+        List files.
+
        Returns a list of files that belong to the user's organization.
      parameters:
        - name: after
@ -699,11 +703,13 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
+      summary: Upload file.
      description: >-
+        Upload file.
+
        Upload a file that can be used across various endpoints.

+
        The file upload should be a multipart form request with:

        - file: The File object (not file name) to be uploaded.
@ -752,9 +758,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns information about a specific file.
+      summary: Retrieve file.
      description: >-
+        Retrieve file.
+
        Returns information about a specific file.
      parameters:
        - name: file_id
@ -786,8 +793,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: Delete a file.
-      description: Delete a file.
+      summary: Delete file.
+      description: Delete file.
      parameters:
        - name: file_id
          in: path
@ -819,9 +826,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns the contents of the specified file.
+      summary: Retrieve file content.
      description: >-
+        Retrieve file content.
+
        Returns the contents of the specified file.
      parameters:
        - name: file_id
@ -854,9 +862,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inspect
-      summary: >-
-        Get the current health status of the service.
+      summary: Get health status.
      description: >-
+        Get health status.
+
        Get the current health status of the service.
      parameters: []
      deprecated: false
@ -882,9 +891,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inspect
-      summary: >-
-        List all available API routes with their methods and implementing providers.
+      summary: List routes.
      description: >-
+        List routes.
+
        List all available API routes with their methods and implementing providers.
      parameters: []
      deprecated: false
@ -933,8 +943,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: Register a model.
-      description: Register a model.
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
      parameters: []
      requestBody:
        content:
@ -964,8 +977,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: Get a model by its identifier.
-      description: Get a model by its identifier.
+      summary: Get model.
+      description: >-
+        Get model.
+
+        Get a model by its identifier.
      parameters:
        - name: model_id
          in: path
@ -990,8 +1006,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: Unregister a model.
-      description: Unregister a model.
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
      parameters:
        - name: model_id
          in: path
@ -1022,9 +1041,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
+      summary: Create moderation.
      description: >-
+        Create moderation.
+
        Classifies if text and/or image inputs are potentially harmful.
      parameters: []
      requestBody:
@ -1080,8 +1100,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: Create a new prompt.
-      description: Create a new prompt.
+      summary: Create prompt.
+      description: >-
+        Create prompt.
+
+        Create a new prompt.
      parameters: []
      requestBody:
        content:
@ -1111,9 +1134,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: >-
-        Get a prompt by its identifier and optional version.
+      summary: Get prompt.
      description: >-
+        Get prompt.
+
        Get a prompt by its identifier and optional version.
      parameters:
        - name: prompt_id
@ -1151,9 +1175,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: >-
-        Update an existing prompt (increments version).
+      summary: Update prompt.
      description: >-
+        Update prompt.
+
        Update an existing prompt (increments version).
      parameters:
        - name: prompt_id
@ -1185,8 +1210,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: Delete a prompt.
-      description: Delete a prompt.
+      summary: Delete prompt.
+      description: >-
+        Delete prompt.
+
+        Delete a prompt.
      parameters:
        - name: prompt_id
          in: path
@ -1217,9 +1245,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: >-
-        Set which version of a prompt should be the default in get_prompt (latest).
+      summary: Set prompt version.
      description: >-
+        Set prompt version.
+
        Set which version of a prompt should be the default in get_prompt (latest).
      parameters:
        - name: prompt_id
@ -1257,8 +1286,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: List all versions of a specific prompt.
-      description: List all versions of a specific prompt.
+      summary: List prompt versions.
+      description: >-
+        List prompt versions.
+
+        List all versions of a specific prompt.
      parameters:
        - name: prompt_id
          in: path
@ -1290,8 +1322,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Providers
-      summary: List all available providers.
-      description: List all available providers.
+      summary: List providers.
+      description: >-
+        List providers.
+
+        List all available providers.
      parameters: []
      deprecated: false
  /v1/providers/{provider_id}:
@ -1316,9 +1351,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Providers
-      summary: >-
-        Get detailed information about a specific provider.
+      summary: Get provider.
      description: >-
+        Get provider.
+
        Get detailed information about a specific provider.
      parameters:
        - name: provider_id
@ -1349,8 +1385,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
+      summary: List all responses.
+      description: List all responses.
      parameters:
        - name: after
          in: query
@ -1401,8 +1437,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: Create a model response.
+      description: Create a model response.
      parameters: []
      requestBody:
        content:
@ -1444,8 +1480,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
+      summary: Get a model response.
+      description: Get a model response.
      parameters:
        - name: response_id
          in: path
@ -1475,8 +1511,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
+      summary: Delete a response.
+      description: Delete a response.
      parameters:
        - name: response_id
          in: path
@ -1506,10 +1542,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
+      summary: List input items.
+      description: List input items.
      parameters:
        - name: response_id
          in: path
@ -1578,8 +1612,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Safety
-      summary: Run a shield.
-      description: Run a shield.
+      summary: Run shield.
+      description: >-
+        Run shield.
+
+        Run a shield.
      parameters: []
      requestBody:
        content:
@ -3135,8 +3172,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inspect
-      summary: Get the version of the service.
-      description: Get the version of the service.
+      summary: Get version.
+      description: >-
+        Get version.
+
+        Get the version of the service.
      parameters: []
      deprecated: false
 jsonSchemaDialect: >-
@ -9749,9 +9789,16 @@ tags:
    x-displayName: >-
      Protocol for conversation management operations.
  - name: Files
-    description: ''
+    description: >-
+      This API is used to upload documents that can be used with other Llama Stack
+      APIs.
+    x-displayName: Files
  - name: Inference
    description: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
+
+
      This API provides the raw interface to the underlying models. Two kinds of models
      are supported:

@ -9759,23 +9806,25 @@ tags:

      - Embedding models: these models generate embeddings to be used for semantic
      search.
-    x-displayName: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
+    x-displayName: Inference
  - name: Inspect
-    description: ''
+    description: >-
+      APIs for inspecting the Llama Stack service, including health status, available
+      API routes with methods and implementing providers.
+    x-displayName: Inspect
  - name: Models
    description: ''
  - name: Prompts
-    description: ''
-    x-displayName: >-
+    description: >-
      Protocol for prompt management operations.
+    x-displayName: Prompts
  - name: Providers
-    description: ''
-    x-displayName: >-
+    description: >-
      Providers API for inspecting, listing, and modifying providers and their configurations.
+    x-displayName: Providers
  - name: Safety
-    description: ''
+    description: OpenAI-compatible Moderations API.
+    x-displayName: Safety
  - name: Scoring
    description: ''
  - name: ScoringFunctions
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -69,8 +69,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "List all chat completions.",
-                "description": "List all chat completions.",
+                "summary": "List chat completions.",
+                "description": "List chat completions.",
                "parameters": [
                    {
                        "name": "after",
@ -146,8 +146,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
-                "description": "Generate an OpenAI-compatible chat completion for the given messages using the specified model.",
+                "summary": "Create chat completions.",
+                "description": "Create chat completions.\nGenerate an OpenAI-compatible chat completion for the given messages using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -191,8 +191,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Describe a chat completion by its ID.",
-                "description": "Describe a chat completion by its ID.",
+                "summary": "Get chat completion.",
+                "description": "Get chat completion.\nDescribe a chat completion by its ID.",
                "parameters": [
                    {
                        "name": "completion_id",
@ -236,8 +236,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
-                "description": "Generate an OpenAI-compatible completion for the given prompt using the specified model.",
+                "summary": "Create completion.",
+                "description": "Create completion.\nGenerate an OpenAI-compatible completion for the given prompt using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -758,8 +758,8 @@
                "tags": [
                    "Inference"
                ],
-                "summary": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
-                "description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
+                "summary": "Create embeddings.",
+                "description": "Create embeddings.\nGenerate OpenAI-compatible embeddings for the given input using the specified model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -803,8 +803,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns a list of files that belong to the user's organization.",
-                "description": "Returns a list of files that belong to the user's organization.",
+                "summary": "List files.",
+                "description": "List files.\nReturns a list of files that belong to the user's organization.",
                "parameters": [
                    {
                        "name": "after",
@ -873,8 +873,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Upload a file that can be used across various endpoints.",
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
+                "summary": "Upload file.",
+                "description": "Upload file.\nUpload a file that can be used across various endpoints.\n\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -934,8 +934,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns information about a specific file.",
-                "description": "Returns information about a specific file.",
+                "summary": "Retrieve file.",
+                "description": "Retrieve file.\nReturns information about a specific file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -977,8 +977,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Delete a file.",
-                "description": "Delete a file.",
+                "summary": "Delete file.",
+                "description": "Delete file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1022,8 +1022,8 @@
                "tags": [
                    "Files"
                ],
-                "summary": "Returns the contents of the specified file.",
-                "description": "Returns the contents of the specified file.",
+                "summary": "Retrieve file content.",
+                "description": "Retrieve file content.\nReturns the contents of the specified file.",
                "parameters": [
                    {
                        "name": "file_id",
@ -1067,8 +1067,8 @@
                "tags": [
                    "Inspect"
                ],
-                "summary": "Get the current health status of the service.",
-                "description": "Get the current health status of the service.",
+                "summary": "Get health status.",
+                "description": "Get health status.\nGet the current health status of the service.",
                "parameters": [],
                "deprecated": false
            }
@ -1102,8 +1102,8 @@
                "tags": [
                    "Inspect"
                ],
-                "summary": "List all available API routes with their methods and implementing providers.",
-                "description": "List all available API routes with their methods and implementing providers.",
+                "summary": "List routes.",
+                "description": "List routes.\nList all available API routes with their methods and implementing providers.",
                "parameters": [],
                "deprecated": false
            }
@ -1170,8 +1170,8 @@
                "tags": [
                    "Models"
                ],
-                "summary": "Register a model.",
-                "description": "Register a model.",
+                "summary": "Register model.",
+                "description": "Register model.\nRegister a model.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1215,8 +1215,8 @@
                "tags": [
                    "Models"
                ],
-                "summary": "Get a model by its identifier.",
-                "description": "Get a model by its identifier.",
+                "summary": "Get model.",
+                "description": "Get model.\nGet a model by its identifier.",
                "parameters": [
                    {
                        "name": "model_id",
@ -1251,8 +1251,8 @@
                "tags": [
                    "Models"
                ],
-                "summary": "Unregister a model.",
-                "description": "Unregister a model.",
+                "summary": "Unregister model.",
+                "description": "Unregister model.\nUnregister a model.",
                "parameters": [
                    {
                        "name": "model_id",
@ -1296,8 +1296,8 @@
                "tags": [
                    "Safety"
                ],
-                "summary": "Classifies if text and/or image inputs are potentially harmful.",
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "summary": "Create moderation.",
+                "description": "Create moderation.\nClassifies if text and/or image inputs are potentially harmful.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1374,8 +1374,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Create a new prompt.",
-                "description": "Create a new prompt.",
+                "summary": "Create prompt.",
+                "description": "Create prompt.\nCreate a new prompt.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1419,8 +1419,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Get a prompt by its identifier and optional version.",
-                "description": "Get a prompt by its identifier and optional version.",
+                "summary": "Get prompt.",
+                "description": "Get prompt.\nGet a prompt by its identifier and optional version.",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1471,8 +1471,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Update an existing prompt (increments version).",
-                "description": "Update an existing prompt (increments version).",
+                "summary": "Update prompt.",
+                "description": "Update prompt.\nUpdate an existing prompt (increments version).",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1517,8 +1517,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Delete a prompt.",
-                "description": "Delete a prompt.",
+                "summary": "Delete prompt.",
+                "description": "Delete prompt.\nDelete a prompt.",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1562,8 +1562,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "Set which version of a prompt should be the default in get_prompt (latest).",
-                "description": "Set which version of a prompt should be the default in get_prompt (latest).",
+                "summary": "Set prompt version.",
+                "description": "Set prompt version.\nSet which version of a prompt should be the default in get_prompt (latest).",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1617,8 +1617,8 @@
                "tags": [
                    "Prompts"
                ],
-                "summary": "List all versions of a specific prompt.",
-                "description": "List all versions of a specific prompt.",
+                "summary": "List prompt versions.",
+                "description": "List prompt versions.\nList all versions of a specific prompt.",
                "parameters": [
                    {
                        "name": "prompt_id",
@ -1662,8 +1662,8 @@
                "tags": [
                    "Providers"
                ],
-                "summary": "List all available providers.",
-                "description": "List all available providers.",
+                "summary": "List providers.",
+                "description": "List providers.\nList all available providers.",
                "parameters": [],
                "deprecated": false
            }
@ -1697,8 +1697,8 @@
                "tags": [
                    "Providers"
                ],
-                "summary": "Get detailed information about a specific provider.",
-                "description": "Get detailed information about a specific provider.",
+                "summary": "Get provider.",
+                "description": "Get provider.\nGet detailed information about a specific provider.",
                "parameters": [
                    {
                        "name": "provider_id",
@ -1742,8 +1742,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "List all OpenAI responses.",
-                "description": "List all OpenAI responses.",
+                "summary": "List all responses.",
+                "description": "List all responses.",
                "parameters": [
                    {
                        "name": "after",
@ -1817,8 +1817,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Create a new OpenAI response.",
-                "description": "Create a new OpenAI response.",
+                "summary": "Create a model response.",
+                "description": "Create a model response.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -1882,8 +1882,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Retrieve an OpenAI response by its ID.",
-                "description": "Retrieve an OpenAI response by its ID.",
+                "summary": "Get a model response.",
+                "description": "Get a model response.",
                "parameters": [
                    {
                        "name": "response_id",
@ -1925,8 +1925,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "Delete an OpenAI response by its ID.",
-                "description": "Delete an OpenAI response by its ID.",
+                "summary": "Delete a response.",
+                "description": "Delete a response.",
                "parameters": [
                    {
                        "name": "response_id",
@ -1970,8 +1970,8 @@
                "tags": [
                    "Agents"
                ],
-                "summary": "List input items for a given OpenAI response.",
-                "description": "List input items for a given OpenAI response.",
+                "summary": "List input items.",
+                "description": "List input items.",
                "parameters": [
                    {
                        "name": "response_id",
@ -2063,8 +2063,8 @@
                "tags": [
                    "Safety"
                ],
-                "summary": "Run a shield.",
-                "description": "Run a shield.",
+                "summary": "Run shield.",
+                "description": "Run shield.\nRun a shield.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -4196,8 +4196,8 @@
                "tags": [
                    "Inspect"
                ],
-                "summary": "Get the version of the service.",
-                "description": "Get the version of the service.",
+                "summary": "Get version.",
+                "description": "Get version.\nGet the version of the service.",
                "parameters": [],
                "deprecated": false
            }
@ -18487,16 +18487,18 @@
        },
        {
            "name": "Files",
-            "description": ""
+            "description": "This API is used to upload documents that can be used with other Llama Stack APIs.",
+            "x-displayName": "Files"
        },
        {
            "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
-            "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
+            "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "x-displayName": "Inference"
        },
        {
            "name": "Inspect",
-            "description": ""
+            "description": "APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.",
+            "x-displayName": "Inspect"
        },
        {
            "name": "Models",
@ -18508,17 +18510,18 @@
        },
        {
            "name": "Prompts",
-            "description": "",
-            "x-displayName": "Protocol for prompt management operations."
+            "description": "Protocol for prompt management operations.",
+            "x-displayName": "Prompts"
        },
        {
            "name": "Providers",
-            "description": "",
-            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
+            "description": "Providers API for inspecting, listing, and modifying providers and their configurations.",
+            "x-displayName": "Providers"
        },
        {
            "name": "Safety",
-            "description": ""
+            "description": "OpenAI-compatible Moderations API.",
+            "x-displayName": "Safety"
        },
        {
            "name": "Scoring",
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -36,8 +36,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: List all chat completions.
-      description: List all chat completions.
+      summary: List chat completions.
+      description: List chat completions.
      parameters:
        - name: after
          in: query
@ -90,10 +90,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate an OpenAI-compatible chat completion for the given messages using
-        the specified model.
+      summary: Create chat completions.
      description: >-
+        Create chat completions.
+
        Generate an OpenAI-compatible chat completion for the given messages using
        the specified model.
      parameters: []
@ -125,8 +125,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: Describe a chat completion by its ID.
-      description: Describe a chat completion by its ID.
+      summary: Get chat completion.
+      description: >-
+        Get chat completion.
+
+        Describe a chat completion by its ID.
      parameters:
        - name: completion_id
          in: path
@ -156,10 +159,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate an OpenAI-compatible completion for the given prompt using the specified
-        model.
+      summary: Create completion.
      description: >-
+        Create completion.
+
        Generate an OpenAI-compatible completion for the given prompt using the specified
        model.
      parameters: []
@ -606,10 +609,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inference
-      summary: >-
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
+      summary: Create embeddings.
      description: >-
+        Create embeddings.
+
        Generate OpenAI-compatible embeddings for the given input using the specified
        model.
      parameters: []
@ -642,9 +645,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns a list of files that belong to the user's organization.
+      summary: List files.
      description: >-
+        List files.
+
        Returns a list of files that belong to the user's organization.
      parameters:
        - name: after
@ -702,11 +706,13 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Upload a file that can be used across various endpoints.
+      summary: Upload file.
      description: >-
+        Upload file.
+
        Upload a file that can be used across various endpoints.

+
        The file upload should be a multipart form request with:

        - file: The File object (not file name) to be uploaded.
@ -755,9 +761,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns information about a specific file.
+      summary: Retrieve file.
      description: >-
+        Retrieve file.
+
        Returns information about a specific file.
      parameters:
        - name: file_id
@ -789,8 +796,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: Delete a file.
-      description: Delete a file.
+      summary: Delete file.
+      description: Delete file.
      parameters:
        - name: file_id
          in: path
@ -822,9 +829,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Files
-      summary: >-
-        Returns the contents of the specified file.
+      summary: Retrieve file content.
      description: >-
+        Retrieve file content.
+
        Returns the contents of the specified file.
      parameters:
        - name: file_id
@ -857,9 +865,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inspect
-      summary: >-
-        Get the current health status of the service.
+      summary: Get health status.
      description: >-
+        Get health status.
+
        Get the current health status of the service.
      parameters: []
      deprecated: false
@ -885,9 +894,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inspect
-      summary: >-
-        List all available API routes with their methods and implementing providers.
+      summary: List routes.
      description: >-
+        List routes.
+
        List all available API routes with their methods and implementing providers.
      parameters: []
      deprecated: false
@ -936,8 +946,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: Register a model.
-      description: Register a model.
+      summary: Register model.
+      description: >-
+        Register model.
+
+        Register a model.
      parameters: []
      requestBody:
        content:
@ -967,8 +980,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: Get a model by its identifier.
-      description: Get a model by its identifier.
+      summary: Get model.
+      description: >-
+        Get model.
+
+        Get a model by its identifier.
      parameters:
        - name: model_id
          in: path
@ -993,8 +1009,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Models
-      summary: Unregister a model.
-      description: Unregister a model.
+      summary: Unregister model.
+      description: >-
+        Unregister model.
+
+        Unregister a model.
      parameters:
        - name: model_id
          in: path
@ -1025,9 +1044,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Safety
-      summary: >-
-        Classifies if text and/or image inputs are potentially harmful.
+      summary: Create moderation.
      description: >-
+        Create moderation.
+
        Classifies if text and/or image inputs are potentially harmful.
      parameters: []
      requestBody:
@ -1083,8 +1103,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: Create a new prompt.
-      description: Create a new prompt.
+      summary: Create prompt.
+      description: >-
+        Create prompt.
+
+        Create a new prompt.
      parameters: []
      requestBody:
        content:
@ -1114,9 +1137,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: >-
-        Get a prompt by its identifier and optional version.
+      summary: Get prompt.
      description: >-
+        Get prompt.
+
        Get a prompt by its identifier and optional version.
      parameters:
        - name: prompt_id
@ -1154,9 +1178,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: >-
-        Update an existing prompt (increments version).
+      summary: Update prompt.
      description: >-
+        Update prompt.
+
        Update an existing prompt (increments version).
      parameters:
        - name: prompt_id
@ -1188,8 +1213,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: Delete a prompt.
-      description: Delete a prompt.
+      summary: Delete prompt.
+      description: >-
+        Delete prompt.
+
+        Delete a prompt.
      parameters:
        - name: prompt_id
          in: path
@ -1220,9 +1248,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: >-
-        Set which version of a prompt should be the default in get_prompt (latest).
+      summary: Set prompt version.
      description: >-
+        Set prompt version.
+
        Set which version of a prompt should be the default in get_prompt (latest).
      parameters:
        - name: prompt_id
@ -1260,8 +1289,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Prompts
-      summary: List all versions of a specific prompt.
-      description: List all versions of a specific prompt.
+      summary: List prompt versions.
+      description: >-
+        List prompt versions.
+
+        List all versions of a specific prompt.
      parameters:
        - name: prompt_id
          in: path
@ -1293,8 +1325,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Providers
-      summary: List all available providers.
-      description: List all available providers.
+      summary: List providers.
+      description: >-
+        List providers.
+
+        List all available providers.
      parameters: []
      deprecated: false
  /v1/providers/{provider_id}:
@ -1319,9 +1354,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Providers
-      summary: >-
-        Get detailed information about a specific provider.
+      summary: Get provider.
      description: >-
+        Get provider.
+
        Get detailed information about a specific provider.
      parameters:
        - name: provider_id
@ -1352,8 +1388,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: List all OpenAI responses.
-      description: List all OpenAI responses.
+      summary: List all responses.
+      description: List all responses.
      parameters:
        - name: after
          in: query
@ -1404,8 +1440,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Create a new OpenAI response.
-      description: Create a new OpenAI response.
+      summary: Create a model response.
+      description: Create a model response.
      parameters: []
      requestBody:
        content:
@ -1447,8 +1483,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Retrieve an OpenAI response by its ID.
-      description: Retrieve an OpenAI response by its ID.
+      summary: Get a model response.
+      description: Get a model response.
      parameters:
        - name: response_id
          in: path
@ -1478,8 +1514,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: Delete an OpenAI response by its ID.
-      description: Delete an OpenAI response by its ID.
+      summary: Delete a response.
+      description: Delete a response.
      parameters:
        - name: response_id
          in: path
@ -1509,10 +1545,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      summary: >-
-        List input items for a given OpenAI response.
-      description: >-
-        List input items for a given OpenAI response.
+      summary: List input items.
+      description: List input items.
      parameters:
        - name: response_id
          in: path
@ -1581,8 +1615,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Safety
-      summary: Run a shield.
-      description: Run a shield.
+      summary: Run shield.
+      description: >-
+        Run shield.
+
+        Run a shield.
      parameters: []
      requestBody:
        content:
@ -3138,8 +3175,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Inspect
-      summary: Get the version of the service.
-      description: Get the version of the service.
+      summary: Get version.
+      description: >-
+        Get version.
+
+        Get the version of the service.
      parameters: []
      deprecated: false
  /v1beta/datasetio/append-rows/{dataset_id}:
@ -13795,9 +13835,16 @@ tags:
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: Files
-    description: ''
+    description: >-
+      This API is used to upload documents that can be used with other Llama Stack
+      APIs.
+    x-displayName: Files
  - name: Inference
    description: >-
+      Llama Stack Inference API for generating completions, chat completions, and
+      embeddings.
+
+
      This API provides the raw interface to the underlying models. Two kinds of models
      are supported:

@ -13805,25 +13852,27 @@ tags:

      - Embedding models: these models generate embeddings to be used for semantic
      search.
-    x-displayName: >-
-      Llama Stack Inference API for generating completions, chat completions, and
-      embeddings.
+    x-displayName: Inference
  - name: Inspect
-    description: ''
+    description: >-
+      APIs for inspecting the Llama Stack service, including health status, available
+      API routes with methods and implementing providers.
+    x-displayName: Inspect
  - name: Models
    description: ''
  - name: PostTraining (Coming Soon)
    description: ''
  - name: Prompts
-    description: ''
-    x-displayName: >-
+    description: >-
      Protocol for prompt management operations.
+    x-displayName: Prompts
  - name: Providers
-    description: ''
-    x-displayName: >-
+    description: >-
      Providers API for inspecting, listing, and modifying providers and their configurations.
+    x-displayName: Providers
  - name: Safety
-    description: ''
+    description: OpenAI-compatible Moderations API.
+    x-displayName: Safety
  - name: Scoring
    description: ''
  - name: ScoringFunctions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -797,7 +797,7 @@ class Agents(Protocol):
        self,
        response_id: str,
    ) -> OpenAIResponseObject:
-        """Retrieve an OpenAI response by its ID.
+        """Get a model response.

        :param response_id: The ID of the OpenAI response to retrieve.
        :returns: An OpenAIResponseObject.
@ -826,7 +826,7 @@ class Agents(Protocol):
            ),
        ] = None,
    ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
-        """Create a new OpenAI response.
+        """Create a model response.

        :param input: Input message(s) to create the response.
        :param model: The underlying LLM used for completions.
@ -846,7 +846,7 @@ class Agents(Protocol):
        model: str | None = None,
        order: Order | None = Order.desc,
    ) -> ListOpenAIResponseObject:
-        """List all OpenAI responses.
+        """List all responses.

        :param after: The ID of the last response to return.
        :param limit: The number of responses to return.
@ -869,7 +869,7 @@ class Agents(Protocol):
        limit: int | None = 20,
        order: Order | None = Order.desc,
    ) -> ListOpenAIResponseInputItem:
-        """List input items for a given OpenAI response.
+        """List input items.

        :param response_id: The ID of the response to retrieve input items for.
        :param after: An item ID to list items after, used for pagination.
@ -884,7 +884,7 @@ class Agents(Protocol):
    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
-        """Delete an OpenAI response by its ID.
+        """Delete a response.

        :param response_id: The ID of the OpenAI response to delete.
        :returns: An OpenAIDeleteResponseObject
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -104,6 +104,11 @@ class OpenAIFileDeleteResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Files(Protocol):
+    """Files
+
+    This API is used to upload documents that can be used with other Llama Stack APIs.
+    """
+
    # OpenAI Files API Endpoints
    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
@ -113,7 +118,8 @@ class Files(Protocol):
        purpose: Annotated[OpenAIFilePurpose, Form()],
        expires_after: Annotated[ExpiresAfter | None, Form()] = None,
    ) -> OpenAIFileObject:
-        """
+        """Upload file.
+
        Upload a file that can be used across various endpoints.

        The file upload should be a multipart form request with:
@ -137,7 +143,8 @@ class Files(Protocol):
        order: Order | None = Order.desc,
        purpose: OpenAIFilePurpose | None = None,
    ) -> ListOpenAIFileResponse:
-        """
+        """List files.
+
        Returns a list of files that belong to the user's organization.

        :param after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
@ -154,7 +161,8 @@ class Files(Protocol):
        self,
        file_id: str,
    ) -> OpenAIFileObject:
-        """
+        """Retrieve file.
+
        Returns information about a specific file.

        :param file_id: The ID of the file to use for this request.
@ -168,8 +176,7 @@ class Files(Protocol):
        self,
        file_id: str,
    ) -> OpenAIFileDeleteResponse:
-        """
-        Delete a file.
+        """Delete file.

        :param file_id: The ID of the file to use for this request.
        :returns: An OpenAIFileDeleteResponse indicating successful deletion.
@ -182,7 +189,8 @@ class Files(Protocol):
        self,
        file_id: str,
    ) -> Response:
-        """
+        """Retrieve file content.
+
        Returns the contents of the specified file.

        :param file_id: The ID of the file to use for this request.
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1053,7 +1053,9 @@ class InferenceProvider(Protocol):
        # for fill-in-the-middle type completion
        suffix: str | None = None,
    ) -> OpenAICompletion:
-        """Generate an OpenAI-compatible completion for the given prompt using the specified model.
+        """Create completion.
+
+        Generate an OpenAI-compatible completion for the given prompt using the specified model.

        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
        :param prompt: The prompt to generate a completion for.
@ -1105,7 +1107,9 @@ class InferenceProvider(Protocol):
        top_p: float | None = None,
        user: str | None = None,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        """Generate an OpenAI-compatible chat completion for the given messages using the specified model.
+        """Create chat completions.
+
+        Generate an OpenAI-compatible chat completion for the given messages using the specified model.

        :param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
        :param messages: List of messages in the conversation.
@ -1144,7 +1148,9 @@ class InferenceProvider(Protocol):
        dimensions: int | None = None,
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
-        """Generate OpenAI-compatible embeddings for the given input using the specified model.
+        """Create embeddings.
+
+        Generate OpenAI-compatible embeddings for the given input using the specified model.

        :param model: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
        :param input: Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings.
@ -1157,7 +1163,9 @@ class InferenceProvider(Protocol):


 class Inference(InferenceProvider):
-    """Llama Stack Inference API for generating completions, chat completions, and embeddings.
+    """Inference
+
+    Llama Stack Inference API for generating completions, chat completions, and embeddings.

    This API provides the raw interface to the underlying models. Two kinds of models are supported:
    - LLM models: these models generate "raw" and "chat" (conversational) completions.
@ -1173,7 +1181,7 @@ class Inference(InferenceProvider):
        model: str | None = None,
        order: Order | None = Order.desc,
    ) -> ListOpenAIChatCompletionResponse:
-        """List all chat completions.
+        """List chat completions.

        :param after: The ID of the last chat completion to return.
        :param limit: The maximum number of chat completions to return.
@ -1188,7 +1196,9 @@ class Inference(InferenceProvider):
    )
    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
-        """Describe a chat completion by its ID.
+        """Get chat completion.
+
+        Describe a chat completion by its ID.

        :param completion_id: ID of the chat completion.
        :returns: A OpenAICompletionWithInputMessages.
--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@ -58,9 +58,16 @@ class ListRoutesResponse(BaseModel):

@runtime_checkable
 class Inspect(Protocol):
+    """Inspect
+
+    APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
+    """
+
    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
    async def list_routes(self) -> ListRoutesResponse:
-        """List all available API routes with their methods and implementing providers.
+        """List routes.
+
+        List all available API routes with their methods and implementing providers.

        :returns: Response containing information about all available routes.
        """
@ -68,7 +75,9 @@ class Inspect(Protocol):

    @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
    async def health(self) -> HealthInfo:
-        """Get the current health status of the service.
+        """Get health status.
+
+        Get the current health status of the service.

        :returns: Health information indicating if the service is operational.
        """
@ -76,7 +85,9 @@ class Inspect(Protocol):

    @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
    async def version(self) -> VersionInfo:
-        """Get the version of the service.
+        """Get version.
+
+        Get the version of the service.

        :returns: Version information containing the service version number.
        """
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -124,7 +124,9 @@ class Models(Protocol):
        self,
        model_id: str,
    ) -> Model:
-        """Get a model by its identifier.
+        """Get model.
+
+        Get a model by its identifier.

        :param model_id: The identifier of the model to get.
        :returns: A Model.
@ -140,7 +142,9 @@ class Models(Protocol):
        metadata: dict[str, Any] | None = None,
        model_type: ModelType | None = None,
    ) -> Model:
-        """Register a model.
+        """Register model.
+
+        Register a model.

        :param model_id: The identifier of the model to register.
        :param provider_model_id: The identifier of the model in the provider.
@ -156,7 +160,9 @@ class Models(Protocol):
        self,
        model_id: str,
    ) -> None:
-        """Unregister a model.
+        """Unregister model.
+
+        Unregister a model.

        :param model_id: The identifier of the model to unregister.
        """
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@ -94,7 +94,9 @@ class ListPromptsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Prompts(Protocol):
-    """Protocol for prompt management operations."""
+    """Prompts
+
+    Protocol for prompt management operations."""

    @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
    async def list_prompts(self) -> ListPromptsResponse:
@ -109,7 +111,9 @@ class Prompts(Protocol):
        self,
        prompt_id: str,
    ) -> ListPromptsResponse:
-        """List all versions of a specific prompt.
+        """List prompt versions.
+
+        List all versions of a specific prompt.

        :param prompt_id: The identifier of the prompt to list versions for.
        :returns: A ListPromptsResponse containing all versions of the prompt.
@ -122,7 +126,9 @@ class Prompts(Protocol):
        prompt_id: str,
        version: int | None = None,
    ) -> Prompt:
-        """Get a prompt by its identifier and optional version.
+        """Get prompt.
+
+        Get a prompt by its identifier and optional version.

        :param prompt_id: The identifier of the prompt to get.
        :param version: The version of the prompt to get (defaults to latest).
@ -136,7 +142,9 @@ class Prompts(Protocol):
        prompt: str,
        variables: list[str] | None = None,
    ) -> Prompt:
-        """Create a new prompt.
+        """Create prompt.
+
+        Create a new prompt.

        :param prompt: The prompt text content with variable placeholders.
        :param variables: List of variable names that can be used in the prompt template.
@ -153,7 +161,9 @@ class Prompts(Protocol):
        variables: list[str] | None = None,
        set_as_default: bool = True,
    ) -> Prompt:
-        """Update an existing prompt (increments version).
+        """Update prompt.
+
+        Update an existing prompt (increments version).

        :param prompt_id: The identifier of the prompt to update.
        :param prompt: The updated prompt text content.
@ -169,7 +179,9 @@ class Prompts(Protocol):
        self,
        prompt_id: str,
    ) -> None:
-        """Delete a prompt.
+        """Delete prompt.
+
+        Delete a prompt.

        :param prompt_id: The identifier of the prompt to delete.
        """
@ -181,7 +193,9 @@ class Prompts(Protocol):
        prompt_id: str,
        version: int,
    ) -> Prompt:
-        """Set which version of a prompt should be the default in get_prompt (latest).
+        """Set prompt version.
+
+        Set which version of a prompt should be the default in get_prompt (latest).

        :param prompt_id: The identifier of the prompt.
        :param version: The version to set as default.
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@ -42,13 +42,16 @@ class ListProvidersResponse(BaseModel):

@runtime_checkable
 class Providers(Protocol):
-    """
+    """Providers
+
    Providers API for inspecting, listing, and modifying providers and their configurations.
    """

    @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
    async def list_providers(self) -> ListProvidersResponse:
-        """List all available providers.
+        """List providers.
+
+        List all available providers.

        :returns: A ListProvidersResponse containing information about all providers.
        """
@ -56,7 +59,9 @@ class Providers(Protocol):

    @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
-        """Get detailed information about a specific provider.
+        """Get provider.
+
+        Get detailed information about a specific provider.

        :param provider_id: The ID of the provider to inspect.
        :returns: A ProviderInfo object containing the provider's details.
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -96,6 +96,11 @@ class ShieldStore(Protocol):
@runtime_checkable
@trace_protocol
 class Safety(Protocol):
+    """Safety
+
+    OpenAI-compatible Moderations API.
+    """
+
    shield_store: ShieldStore

    @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
@ -105,7 +110,9 @@ class Safety(Protocol):
        messages: list[Message],
        params: dict[str, Any],
    ) -> RunShieldResponse:
-        """Run a shield.
+        """Run shield.
+
+        Run a shield.

        :param shield_id: The identifier of the shield to run.
        :param messages: The messages to run the shield on.
@ -117,7 +124,9 @@ class Safety(Protocol):
    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
-        """Classifies if text and/or image inputs are potentially harmful.
+        """Create moderation.
+
+        Classifies if text and/or image inputs are potentially harmful.
        :param input: Input (or inputs) to classify.
        Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
        :param model: The content moderation model you would like to use.
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -75,39 +75,6 @@ class StackRun(Subcommand):
            help="Start the UI server",
        )

-    def _resolve_config_and_distro(self, args: argparse.Namespace) -> tuple[Path | None, str | None]:
-        """Resolve config file path and distribution name from args.config"""
-        from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
-
-        if not args.config:
-            return None, None
-
-        config_file = Path(args.config)
-        has_yaml_suffix = args.config.endswith(".yaml")
-        distro_name = None
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if this is a distribution
-            config_file = Path(REPO_ROOT) / "llama_stack" / "distributions" / args.config / "run.yaml"
-            if config_file.exists():
-                distro_name = args.config
-
-        if not config_file.exists() and not has_yaml_suffix:
-            # check if it's a build config saved to ~/.llama dir
-            config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
-
-        if not config_file.exists():
-            self.parser.error(
-                f"File {str(config_file)} does not exist.\n\nPlease run `llama stack build` to generate (and optionally edit) a run.yaml file"
-            )
-
-        if not config_file.is_file():
-            self.parser.error(
-                f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
-            )
-
-        return config_file, distro_name
-
    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
        import yaml

--- a/llama_stack/core/routing_tables/models.py
+++ b/llama_stack/core/routing_tables/models.py
@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            try:
                models = await provider.list_models()
            except Exception as e:
-                logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
+                logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
                continue

            self.listed_providers.add(provider_id)
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@ -245,3 +245,65 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
            vector_store_id=vector_store_id,
            file_id=file_id,
        )
+
+    async def openai_create_vector_store_file_batch(
+        self,
+        vector_store_id: str,
+        file_ids: list[str],
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: Any | None = None,
+    ):
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_create_vector_store_file_batch(
+            vector_store_id=vector_store_id,
+            file_ids=file_ids,
+            attributes=attributes,
+            chunking_strategy=chunking_strategy,
+        )
+
+    async def openai_retrieve_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ):
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_retrieve_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+        )
+
+    async def openai_list_files_in_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+        after: str | None = None,
+        before: str | None = None,
+        filter: str | None = None,
+        limit: int | None = 20,
+        order: str | None = "desc",
+    ):
+        await self.assert_action_allowed("read", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_list_files_in_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+            after=after,
+            before=before,
+            filter=filter,
+            limit=limit,
+            order=order,
+        )
+
+    async def openai_cancel_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ):
+        await self.assert_action_allowed("update", "vector_db", vector_store_id)
+        provider = await self.get_provider_impl(vector_store_id)
+        return await provider.openai_cancel_vector_store_file_batch(
+            batch_id=batch_id,
+            vector_store_id=vector_store_id,
+        )
--- a/llama_stack/models/llama/tokenizer_utils.py
+++ b/llama_stack/models/llama/tokenizer_utils.py
@ -9,7 +9,7 @@ from pathlib import Path

 from llama_stack.log import get_logger

-logger = get_logger(__name__, "tokenizer_utils")
+logger = get_logger(__name__, "models")


 def load_bpe_file(model_path: Path) -> dict[bytes, int]:
--- a/llama_stack/providers/inline/agents/meta_reference/init.py
+++ b/llama_stack/providers/inline/agents/meta_reference/init.py
@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
        deps[Api.tool_runtime],
        deps[Api.tool_groups],
        policy,
+        Api.telemetry in deps,
    )
    await impl.initialize()
    return impl
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -110,6 +110,7 @@ class ChatAgent(ShieldRunnerMixin):
        persistence_store: KVStore,
        created_at: str,
        policy: list[AccessRule],
+        telemetry_enabled: bool = False,
    ):
        self.agent_id = agent_id
        self.agent_config = agent_config
@ -120,6 +121,7 @@ class ChatAgent(ShieldRunnerMixin):
        self.tool_runtime_api = tool_runtime_api
        self.tool_groups_api = tool_groups_api
        self.created_at = created_at
+        self.telemetry_enabled = telemetry_enabled

        ShieldRunnerMixin.__init__(
            self,
@ -188,28 +190,30 @@ class ChatAgent(ShieldRunnerMixin):

    async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
        turn_id = str(uuid.uuid4())
-        span = tracing.get_current_span()
-        if span:
-            span.set_attribute("session_id", request.session_id)
-            span.set_attribute("agent_id", self.agent_id)
-            span.set_attribute("request", request.model_dump_json())
-            span.set_attribute("turn_id", turn_id)
-            if self.agent_config.name:
-                span.set_attribute("agent_name", self.agent_config.name)
+        if self.telemetry_enabled:
+            span = tracing.get_current_span()
+            if span is not None:
+                span.set_attribute("session_id", request.session_id)
+                span.set_attribute("agent_id", self.agent_id)
+                span.set_attribute("request", request.model_dump_json())
+                span.set_attribute("turn_id", turn_id)
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)

        await self._initialize_tools(request.toolgroups)
        async for chunk in self._run_turn(request, turn_id):
            yield chunk

    async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
-        span = tracing.get_current_span()
-        if span:
-            span.set_attribute("agent_id", self.agent_id)
-            span.set_attribute("session_id", request.session_id)
-            span.set_attribute("request", request.model_dump_json())
-            span.set_attribute("turn_id", request.turn_id)
-            if self.agent_config.name:
-                span.set_attribute("agent_name", self.agent_config.name)
+        if self.telemetry_enabled:
+            span = tracing.get_current_span()
+            if span is not None:
+                span.set_attribute("agent_id", self.agent_id)
+                span.set_attribute("session_id", request.session_id)
+                span.set_attribute("request", request.model_dump_json())
+                span.set_attribute("turn_id", request.turn_id)
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)

        await self._initialize_tools()
        async for chunk in self._run_turn(request):
@ -395,9 +399,12 @@ class ChatAgent(ShieldRunnerMixin):
        touchpoint: str,
    ) -> AsyncGenerator:
        async with tracing.span("run_shields") as span:
-            span.set_attribute("input", [m.model_dump_json() for m in messages])
+            if self.telemetry_enabled and span is not None:
+                span.set_attribute("input", [m.model_dump_json() for m in messages])
+                if len(shields) == 0:
+                    span.set_attribute("output", "no shields")
+
            if len(shields) == 0:
-                span.set_attribute("output", "no shields")
                return

            step_id = str(uuid.uuid4())
@ -430,7 +437,8 @@ class ChatAgent(ShieldRunnerMixin):
                        )
                    )
                )
-                span.set_attribute("output", e.violation.model_dump_json())
+                if self.telemetry_enabled and span is not None:
+                    span.set_attribute("output", e.violation.model_dump_json())

                yield CompletionMessage(
                    content=str(e),
@ -453,7 +461,8 @@ class ChatAgent(ShieldRunnerMixin):
                    )
                )
            )
-            span.set_attribute("output", "no violations")
+            if self.telemetry_enabled and span is not None:
+                span.set_attribute("output", "no violations")

    async def _run(
        self,
@ -518,8 +527,9 @@ class ChatAgent(ShieldRunnerMixin):
            stop_reason: StopReason | None = None

            async with tracing.span("inference") as span:
-                if self.agent_config.name:
-                    span.set_attribute("agent_name", self.agent_config.name)
+                if self.telemetry_enabled and span is not None:
+                    if self.agent_config.name:
+                        span.set_attribute("agent_name", self.agent_config.name)

                def _serialize_nested(value):
                    """Recursively serialize nested Pydantic models to dicts."""
@ -637,18 +647,19 @@ class ChatAgent(ShieldRunnerMixin):
                    else:
                        raise ValueError(f"Unexpected delta type {type(delta)}")

-                span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
-                span.set_attribute(
-                    "input",
-                    json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
-                )
-                output_attr = json.dumps(
-                    {
-                        "content": content,
-                        "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
-                    }
-                )
-                span.set_attribute("output", output_attr)
+                if self.telemetry_enabled and span is not None:
+                    span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
+                    span.set_attribute(
+                        "input",
+                        json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
+                    )
+                    output_attr = json.dumps(
+                        {
+                            "content": content,
+                            "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
+                        }
+                    )
+                    span.set_attribute("output", output_attr)

            n_iter += 1
            await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
@ -756,7 +767,9 @@ class ChatAgent(ShieldRunnerMixin):
                        {
                            "tool_name": tool_call.tool_name,
                            "input": message.model_dump_json(),
-                        },
+                        }
+                        if self.telemetry_enabled
+                        else {},
                    ) as span:
                        tool_execution_start_time = datetime.now(UTC).isoformat()
                        tool_result = await self.execute_tool_call_maybe(
@ -771,7 +784,8 @@ class ChatAgent(ShieldRunnerMixin):
                            call_id=tool_call.call_id,
                            content=tool_result.content,
                        )
-                        span.set_attribute("output", result_message.model_dump_json())
+                        if self.telemetry_enabled and span is not None:
+                            span.set_attribute("output", result_message.model_dump_json())

                        # Store tool execution step
                        tool_execution_step = ToolExecutionStep(
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents):
        tool_runtime_api: ToolRuntime,
        tool_groups_api: ToolGroups,
        policy: list[AccessRule],
+        telemetry_enabled: bool = False,
    ):
        self.config = config
        self.inference_api = inference_api
@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents):
        self.safety_api = safety_api
        self.tool_runtime_api = tool_runtime_api
        self.tool_groups_api = tool_groups_api
+        self.telemetry_enabled = telemetry_enabled

        self.in_memory_store = InmemoryKVStoreImpl()
        self.openai_responses_impl: OpenAIResponsesImpl | None = None
@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents):
            ),
            created_at=agent_info.created_at,
            policy=self.policy,
+            telemetry_enabled=self.telemetry_enabled,
        )

    async def create_agent_session(
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -97,6 +97,8 @@ class StreamingResponseOrchestrator:
        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = {}
        # Track final messages after all tool executions
        self.final_messages: list[OpenAIMessageParam] = []
+        # mapping for annotations
+        self.citation_files: dict[str, str] = {}

    async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
        # Initialize output messages
@ -126,6 +128,7 @@ class StreamingResponseOrchestrator:
            # Text is the default response format for chat completion so don't need to pass it
            # (some providers don't support non-empty response_format when tools are present)
            response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
+            logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
            completion_result = await self.inference_api.openai_chat_completion(
                model=self.ctx.model,
                messages=messages,
@ -160,7 +163,7 @@ class StreamingResponseOrchestrator:
            # Handle choices with no tool calls
            for choice in current_response.choices:
                if not (choice.message.tool_calls and self.ctx.response_tools):
-                    output_messages.append(await convert_chat_choice_to_response_message(choice))
+                    output_messages.append(await convert_chat_choice_to_response_message(choice, self.citation_files))

            # Execute tool calls and coordinate results
            async for stream_event in self._coordinate_tool_execution(
@ -211,6 +214,8 @@ class StreamingResponseOrchestrator:

        for choice in current_response.choices:
            next_turn_messages.append(choice.message)
+            logger.debug(f"Choice message content: {choice.message.content}")
+            logger.debug(f"Choice message tool_calls: {choice.message.tool_calls}")

            if choice.message.tool_calls and self.ctx.response_tools:
                for tool_call in choice.message.tool_calls:
@ -470,6 +475,8 @@ class StreamingResponseOrchestrator:
                    tool_call_log = result.final_output_message
                    tool_response_message = result.final_input_message
                    self.sequence_number = result.sequence_number
+                    if result.citation_files:
+                        self.citation_files.update(result.citation_files)

            if tool_call_log:
                output_messages.append(tool_call_log)
--- a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@ -94,7 +94,10 @@ class ToolExecutor:

        # Yield the final result
        yield ToolExecutionResult(
-            sequence_number=sequence_number, final_output_message=output_message, final_input_message=input_message
+            sequence_number=sequence_number,
+            final_output_message=output_message,
+            final_input_message=input_message,
+            citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
        )

    async def _execute_knowledge_search_via_vector_store(
@ -129,8 +132,6 @@ class ToolExecutor:
        for results in all_results:
            search_results.extend(results)

-        # Convert search results to tool result format matching memory.py
-        # Format the results as interleaved content similar to memory.py
        content_items = []
        content_items.append(
            TextContentItem(
@ -138,27 +139,58 @@ class ToolExecutor:
            )
        )

+        unique_files = set()
        for i, result_item in enumerate(search_results):
            chunk_text = result_item.content[0].text if result_item.content else ""
-            metadata_text = f"document_id: {result_item.file_id}, score: {result_item.score}"
+            # Get file_id from attributes if result_item.file_id is empty
+            file_id = result_item.file_id or (
+                result_item.attributes.get("document_id") if result_item.attributes else None
+            )
+            metadata_text = f"document_id: {file_id}, score: {result_item.score}"
            if result_item.attributes:
                metadata_text += f", attributes: {result_item.attributes}"
-            text_content = f"[{i + 1}] {metadata_text}\n{chunk_text}\n"
+
+            text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n"
            content_items.append(TextContentItem(text=text_content))
+            unique_files.add(file_id)

        content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
+
+        citation_instruction = ""
+        if unique_files:
+            citation_instruction = (
+                " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
+                "Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
+            )
+
        content_items.append(
            TextContentItem(
-                text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.\n',
+                text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n',
            )
        )

+        # handling missing attributes for old versions
+        citation_files = {}
+        for result in search_results:
+            file_id = result.file_id
+            if not file_id and result.attributes:
+                file_id = result.attributes.get("document_id")
+
+            filename = result.filename
+            if not filename and result.attributes:
+                filename = result.attributes.get("filename")
+            if not filename:
+                filename = "unknown"
+
+            citation_files[file_id] = filename
+
        return ToolInvocationResult(
            content=content_items,
            metadata={
                "document_ids": [r.file_id for r in search_results],
                "chunks": [r.content[0].text if r.content else "" for r in search_results],
                "scores": [r.score for r in search_results],
+                "citation_files": citation_files,
            },
        )

--- a/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@ -27,6 +27,7 @@ class ToolExecutionResult(BaseModel):
    sequence_number: int
    final_output_message: OpenAIResponseOutput | None = None
    final_input_message: OpenAIMessageParam | None = None
+    citation_files: dict[str, str] | None = None


@dataclass
--- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@ -4,9 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import re
 import uuid

 from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
    OpenAIResponseInput,
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseInputMessageContent,
@ -45,7 +47,9 @@ from llama_stack.apis.inference import (
 )


-async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
+async def convert_chat_choice_to_response_message(
+    choice: OpenAIChoice, citation_files: dict[str, str] | None = None
+) -> OpenAIResponseMessage:
    """Convert an OpenAI Chat Completion choice into an OpenAI Response output message."""
    output_content = ""
    if isinstance(choice.message.content, str):
@ -57,9 +61,11 @@ async def convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenA
            f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
        )

+    annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {})
+
    return OpenAIResponseMessage(
        id=f"msg_{uuid.uuid4()}",
-        content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
+        content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
        status="completed",
        role="assistant",
    )
@ -200,6 +206,53 @@ async def get_message_type_by_role(role: str):
    return role_to_type.get(role)


+def _extract_citations_from_text(
+    text: str, citation_files: dict[str, str]
+) -> tuple[list[OpenAIResponseAnnotationFileCitation], str]:
+    """Extract citation markers from text and create annotations
+
+    Args:
+        text: The text containing citation markers like [file-Cn3MSNn72ENTiiq11Qda4A]
+        citation_files: Dictionary mapping file_id to filename
+
+    Returns:
+        Tuple of (annotations_list, clean_text_without_markers)
+    """
+    file_id_regex = re.compile(r"<\|(?P<file_id>file-[A-Za-z0-9_-]+)\|>")
+
+    annotations = []
+    parts = []
+    total_len = 0
+    last_end = 0
+
+    for m in file_id_regex.finditer(text):
+        # segment before the marker
+        prefix = text[last_end : m.start()]
+
+        # drop one space if it exists (since marker is at sentence end)
+        if prefix.endswith(" "):
+            prefix = prefix[:-1]
+
+        parts.append(prefix)
+        total_len += len(prefix)
+
+        fid = m.group(1)
+        if fid in citation_files:
+            annotations.append(
+                OpenAIResponseAnnotationFileCitation(
+                    file_id=fid,
+                    filename=citation_files[fid],
+                    index=total_len,  # index points to punctuation
+                )
+            )
+
+        last_end = m.end()
+
+    parts.append(text[last_end:])
+    cleaned_text = "".join(parts)
+    return annotations, cleaned_text
+
+
 def is_function_tool_call(
    tool_call: OpenAIChatCompletionToolCall,
    tools: list[OpenAIResponseInputTool],
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -331,5 +331,8 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti

        return ToolInvocationResult(
            content=result.content or [],
-            metadata=result.metadata,
+            metadata={
+                **(result.metadata or {}),
+                "citation_files": getattr(result, "citation_files", None),
+            },
        )
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -200,12 +200,10 @@ class FaissIndex(EmbeddingIndex):

 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
-        self.files_api = files_api
        self.cache: dict[str, VectorDBWithIndex] = {}
-        self.kvstore: KVStore | None = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.kvstore)
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -410,12 +410,10 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
    """

    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
-        self.files_api = files_api
        self.cache: dict[str, VectorDBWithIndex] = {}
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
-        self.kvstore: KVStore | None = None

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.kvstore)
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@ -36,6 +36,9 @@ def available_providers() -> list[ProviderSpec]:
                Api.tool_runtime,
                Api.tool_groups,
            ],
+            optional_api_dependencies=[
+                Api.telemetry,
+            ],
            description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
        ),
    ]
--- a/llama_stack/providers/registry/tool_runtime.py
+++ b/llama_stack/providers/registry/tool_runtime.py
@ -11,6 +11,7 @@ from llama_stack.providers.datatypes import (
    ProviderSpec,
    RemoteProviderSpec,
 )
+from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS


 def available_providers() -> list[ProviderSpec]:
@ -18,9 +19,8 @@ def available_providers() -> list[ProviderSpec]:
        InlineProviderSpec(
            api=Api.tool_runtime,
            provider_type="inline::rag-runtime",
-            pip_packages=[
-                "chardet",
-                "pypdf",
+            pip_packages=DEFAULT_VECTOR_IO_DEPS
+            + [
                "tqdm",
                "numpy",
                "scikit-learn",
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@ -12,13 +12,16 @@ from llama_stack.providers.datatypes import (
    RemoteProviderSpec,
 )

+# Common dependencies for all vector IO providers that support document processing
+DEFAULT_VECTOR_IO_DEPS = ["chardet", "pypdf"]
+

 def available_providers() -> list[ProviderSpec]:
    return [
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::meta-reference",
-            pip_packages=["faiss-cpu"],
+            pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.faiss",
            config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
            deprecation_warning="Please use the `inline::faiss` provider instead.",
@ -29,7 +32,7 @@ def available_providers() -> list[ProviderSpec]:
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::faiss",
-            pip_packages=["faiss-cpu"],
+            pip_packages=["faiss-cpu"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.faiss",
            config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
            api_dependencies=[Api.inference],
@ -82,7 +85,7 @@ more details about Faiss in general.
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::sqlite-vec",
-            pip_packages=["sqlite-vec"],
+            pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.sqlite_vec",
            config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
            api_dependencies=[Api.inference],
@ -289,7 +292,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::sqlite_vec",
-            pip_packages=["sqlite-vec"],
+            pip_packages=["sqlite-vec"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.sqlite_vec",
            config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig",
            deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.",
@ -303,7 +306,7 @@ Please refer to the sqlite-vec provider documentation.
            api=Api.vector_io,
            adapter_type="chromadb",
            provider_type="remote::chromadb",
-            pip_packages=["chromadb-client"],
+            pip_packages=["chromadb-client"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.remote.vector_io.chroma",
            config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig",
            api_dependencies=[Api.inference],
@ -345,7 +348,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::chromadb",
-            pip_packages=["chromadb"],
+            pip_packages=["chromadb"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.chroma",
            config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig",
            api_dependencies=[Api.inference],
@ -389,7 +392,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
            api=Api.vector_io,
            adapter_type="pgvector",
            provider_type="remote::pgvector",
-            pip_packages=["psycopg2-binary"],
+            pip_packages=["psycopg2-binary"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.remote.vector_io.pgvector",
            config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig",
            api_dependencies=[Api.inference],
@ -500,7 +503,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
            api=Api.vector_io,
            adapter_type="weaviate",
            provider_type="remote::weaviate",
-            pip_packages=["weaviate-client>=4.16.5"],
+            pip_packages=["weaviate-client>=4.16.5"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.remote.vector_io.weaviate",
            config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig",
            provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData",
@ -541,7 +544,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::qdrant",
-            pip_packages=["qdrant-client"],
+            pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.qdrant",
            config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig",
            api_dependencies=[Api.inference],
@ -594,7 +597,7 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
            api=Api.vector_io,
            adapter_type="qdrant",
            provider_type="remote::qdrant",
-            pip_packages=["qdrant-client"],
+            pip_packages=["qdrant-client"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.remote.vector_io.qdrant",
            config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig",
            api_dependencies=[Api.inference],
@ -607,7 +610,7 @@ Please refer to the inline provider documentation.
            api=Api.vector_io,
            adapter_type="milvus",
            provider_type="remote::milvus",
-            pip_packages=["pymilvus>=2.4.10"],
+            pip_packages=["pymilvus>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.remote.vector_io.milvus",
            config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
            api_dependencies=[Api.inference],
@ -813,7 +816,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
        InlineProviderSpec(
            api=Api.vector_io,
            provider_type="inline::milvus",
-            pip_packages=["pymilvus[milvus-lite]>=2.4.10"],
+            pip_packages=["pymilvus[milvus-lite]>=2.4.10"] + DEFAULT_VECTOR_IO_DEPS,
            module="llama_stack.providers.inline.vector_io.milvus",
            config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
            api_dependencies=[Api.inference],
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@ -41,9 +41,6 @@ class DatabricksInferenceAdapter(OpenAIMixin):
            ).serving_endpoints.list()  # TODO: this is not async
        ]

-    async def should_refresh_models(self) -> bool:
-        return False
-
    async def openai_completion(
        self,
        model: str,
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.
 from typing import Any

-from llama_stack.apis.inference.inference import OpenAICompletion
+from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
        suffix: str | None = None,
    ) -> OpenAICompletion:
        raise NotImplementedError()
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@ -6,8 +6,6 @@

 from typing import Any

-from pydantic import Field
-
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig

 DEFAULT_OLLAMA_URL = "http://localhost:11434"
@ -15,10 +13,6 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"

 class OllamaImplConfig(RemoteInferenceProviderConfig):
    url: str = DEFAULT_OLLAMA_URL
-    refresh_models: bool = Field(
-        default=False,
-        description="Whether to refresh models periodically",
-    )

    @classmethod
    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -72,9 +72,6 @@ class OllamaInferenceAdapter(OpenAIMixin):
                f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
            )

-    async def should_refresh_models(self) -> bool:
-        return self.config.refresh_models
-
    async def health(self) -> HealthResponse:
        """
        Performs a health check by verifying connectivity to the Ollama server.
--- a/llama_stack/providers/remote/inference/runpod/init.py
+++ b/llama_stack/providers/remote/inference/runpod/init.py
@ -11,6 +11,6 @@ async def get_adapter_impl(config: RunpodImplConfig, _deps):
    from .runpod import RunpodInferenceAdapter

    assert isinstance(config, RunpodImplConfig), f"Unexpected config type: {type(config)}"
-    impl = RunpodInferenceAdapter(config)
+    impl = RunpodInferenceAdapter(config=config)
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@ -4,69 +4,86 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from typing import Any

-from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.inference import OpenAIEmbeddingsResponse
-
-# from llama_stack.providers.datatypes import ModelsProtocolPrivate
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, build_hf_repo_model_entry
-from llama_stack.providers.utils.inference.openai_compat import (
-    get_sampling_options,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    chat_completion_request_to_prompt,
+from llama_stack.apis.inference import (
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
 )
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 from .config import RunpodImplConfig

-# https://docs.runpod.io/serverless/vllm/overview#compatible-models
-# https://github.com/runpod-workers/worker-vllm/blob/main/README.md#compatible-model-architectures
-RUNPOD_SUPPORTED_MODELS = {
-    "Llama3.1-8B": "meta-llama/Llama-3.1-8B",
-    "Llama3.1-70B": "meta-llama/Llama-3.1-70B",
-    "Llama3.1-405B:bf16-mp8": "meta-llama/Llama-3.1-405B",
-    "Llama3.1-405B": "meta-llama/Llama-3.1-405B-FP8",
-    "Llama3.1-405B:bf16-mp16": "meta-llama/Llama-3.1-405B",
-    "Llama3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct",
-    "Llama3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct",
-    "Llama3.1-405B-Instruct:bf16-mp8": "meta-llama/Llama-3.1-405B-Instruct",
-    "Llama3.1-405B-Instruct": "meta-llama/Llama-3.1-405B-Instruct-FP8",
-    "Llama3.1-405B-Instruct:bf16-mp16": "meta-llama/Llama-3.1-405B-Instruct",
-    "Llama3.2-1B": "meta-llama/Llama-3.2-1B",
-    "Llama3.2-3B": "meta-llama/Llama-3.2-3B",
-}

-SAFETY_MODELS_ENTRIES = []
+class RunpodInferenceAdapter(OpenAIMixin):
+    """
+    Adapter for RunPod's OpenAI-compatible API endpoints.
+    Supports VLLM for serverless endpoint self-hosted or public endpoints.
+    Can work with any runpod endpoints that support OpenAI-compatible API
+    """

-# Create MODEL_ENTRIES from RUNPOD_SUPPORTED_MODELS for compatibility with starter template
-MODEL_ENTRIES = [
-    build_hf_repo_model_entry(provider_model_id, model_descriptor)
-    for provider_model_id, model_descriptor in RUNPOD_SUPPORTED_MODELS.items()
-] + SAFETY_MODELS_ENTRIES
+    config: RunpodImplConfig

+    def get_api_key(self) -> str:
+        """Get API key for OpenAI client."""
+        return self.config.api_token

-class RunpodInferenceAdapter(
-    ModelRegistryHelper,
-    Inference,
-):
-    def __init__(self, config: RunpodImplConfig) -> None:
-        ModelRegistryHelper.__init__(self, stack_to_provider_models_map=RUNPOD_SUPPORTED_MODELS)
-        self.config = config
+    def get_base_url(self) -> str:
+        """Get base URL for OpenAI client."""
+        return self.config.url

-    def _get_params(self, request: ChatCompletionRequest) -> dict:
-        return {
-            "model": self.map_to_provider_model(request.model),
-            "prompt": chat_completion_request_to_prompt(request),
-            "stream": request.stream,
-            **get_sampling_options(request.sampling_params),
-        }
-
-    async def openai_embeddings(
+    async def openai_chat_completion(
        self,
        model: str,
-        input: str | list[str],
-        encoding_format: str | None = "float",
-        dimensions: int | None = None,
+        messages: list[OpenAIMessageParam],
+        frequency_penalty: float | None = None,
+        function_call: str | dict[str, Any] | None = None,
+        functions: list[dict[str, Any]] | None = None,
+        logit_bias: dict[str, float] | None = None,
+        logprobs: bool | None = None,
+        max_completion_tokens: int | None = None,
+        max_tokens: int | None = None,
+        n: int | None = None,
+        parallel_tool_calls: bool | None = None,
+        presence_penalty: float | None = None,
+        response_format: OpenAIResponseFormatParam | None = None,
+        seed: int | None = None,
+        stop: str | list[str] | None = None,
+        stream: bool | None = None,
+        stream_options: dict[str, Any] | None = None,
+        temperature: float | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        top_logprobs: int | None = None,
+        top_p: float | None = None,
        user: str | None = None,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    ):
+        """Override to add RunPod-specific stream_options requirement."""
+        if stream and not stream_options:
+            stream_options = {"include_usage": True}
+
+        return await super().openai_chat_completion(
+            model=model,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -63,12 +63,6 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
        # Together's /v1/models is not compatible with OpenAI's /v1/models. Together support ticket #13355 -> will not fix, use Together's own client
        return [m.id for m in await self._get_client().models.list()]

-    async def should_refresh_models(self) -> bool:
-        return True
-
-    async def check_model_availability(self, model):
-        return model in self._model_cache
-
    async def openai_embeddings(
        self,
        model: str,
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@ -30,10 +30,6 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
        default=True,
        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
    )
-    refresh_models: bool = Field(
-        default=False,
-        description="Whether to refresh models periodically",
-    )

    @field_validator("tls_verify")
    @classmethod
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -19,7 +19,6 @@ from llama_stack.apis.inference import (
    OpenAIResponseFormatParam,
    ToolChoice,
 )
-from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    HealthResponse,
@ -54,25 +53,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
                "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
            )

-    async def should_refresh_models(self) -> bool:
-        # Strictly respecting the refresh_models directive
-        return self.config.refresh_models
-
-    async def list_models(self) -> list[Model] | None:
-        models = []
-        async for m in self.client.models.list():
-            model_type = ModelType.llm  # unclear how to determine embedding vs. llm models
-            models.append(
-                Model(
-                    identifier=m.id,
-                    provider_resource_id=m.id,
-                    provider_id=self.__provider_id__,  # type: ignore[attr-defined]
-                    metadata={},
-                    model_type=model_type,
-                )
-            )
-        return models
-
    async def health(self) -> HealthResponse:
        """
        Performs a health check by verifying connectivity to the remote vLLM server.
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -140,14 +140,13 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        inference_api: Api.inference,
        files_api: Files | None,
    ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
        self.config = config
        self.inference_api = inference_api
        self.client = None
        self.cache = {}
-        self.kvstore: KVStore | None = None
        self.vector_db_store = None
-        self.files_api = files_api

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.kvstore)
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -309,14 +309,12 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        inference_api: Inference,
        files_api: Files | None,
    ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.cache = {}
        self.client = None
        self.inference_api = inference_api
-        self.files_api = files_api
-        self.kvstore: KVStore | None = None
        self.vector_db_store = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
        self.metadata_collection_name = "openai_vector_stores_metadata"

    async def initialize(self) -> None:
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -345,14 +345,12 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
        inference_api: Api.inference,
        files_api: Files | None = None,
    ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.conn = None
        self.cache = {}
-        self.files_api = files_api
-        self.kvstore: KVStore | None = None
        self.vector_db_store = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
        self.metadata_collection_name = "openai_vector_stores_metadata"

    async def initialize(self) -> None:
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -27,7 +27,7 @@ from llama_stack.apis.vector_io import (
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
-from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
    ChunkForDeletion,
@ -162,14 +162,12 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        inference_api: Api.inference,
        files_api: Files | None = None,
    ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.client: AsyncQdrantClient = None
        self.cache = {}
        self.inference_api = inference_api
-        self.files_api = files_api
        self.vector_db_store = None
-        self.kvstore: KVStore | None = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
        self._qdrant_lock = asyncio.Lock()

    async def initialize(self) -> None:
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -284,14 +284,12 @@ class WeaviateVectorIOAdapter(
        inference_api: Api.inference,
        files_api: Files | None,
    ) -> None:
+        super().__init__(files_api=files_api, kvstore=None)
        self.config = config
        self.inference_api = inference_api
        self.client_cache = {}
        self.cache = {}
-        self.files_api = files_api
-        self.kvstore: KVStore | None = None
        self.vector_db_store = None
-        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
        self.metadata_collection_name = "openai_vector_stores_metadata"

    def _get_client(self) -> weaviate.WeaviateClient:
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@ -24,6 +24,10 @@ class RemoteInferenceProviderConfig(BaseModel):
        default=None,
        description="List of models that should be registered with the model registry. If None, all models are allowed.",
    )
+    refresh_models: bool = Field(
+        default=False,
+        description="Whether to refresh models periodically from the provider",
+    )


 # TODO: this class is more confusing than useful right now. We need to make it
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@ -132,7 +132,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):

        :return: An iterable of model IDs or None if not implemented
        """
-        return [m.id async for m in self.client.models.list()]
+        client = self.client
+        async with client:
+            model_ids = [m.id async for m in client.models.list()]
+        return model_ids

    async def initialize(self) -> None:
        """
@ -481,7 +484,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        return model in self._model_cache

    async def should_refresh_models(self) -> bool:
-        return False
+        return self.config.refresh_models

    #
    # The model_dump implementations are to avoid serializing the extra fields,
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -12,6 +12,8 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Any

+from pydantic import TypeAdapter
+
 from llama_stack.apis.common.errors import VectorStoreNotFoundError
 from llama_stack.apis.files import Files, OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
@ -50,12 +52,16 @@ logger = get_logger(name=__name__, category="providers::utils")

 # Constants for OpenAI vector stores
 CHUNK_MULTIPLIER = 5
+FILE_BATCH_CLEANUP_INTERVAL_SECONDS = 24 * 60 * 60  # 1 day in seconds
+MAX_CONCURRENT_FILES_PER_BATCH = 3  # Maximum concurrent file processing within a batch
+FILE_BATCH_CHUNK_SIZE = 10  # Process files in chunks of this size

 VERSION = "v3"
 VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
 OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
 OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
+OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX = f"openai_vector_stores_file_batches:{VERSION}::"


 class OpenAIVectorStoreMixin(ABC):
@ -65,11 +71,15 @@ class OpenAIVectorStoreMixin(ABC):
    an openai_vector_stores in-memory cache.
    """

-    # These should be provided by the implementing class
-    openai_vector_stores: dict[str, dict[str, Any]]
-    files_api: Files | None
-    # KV store for persisting OpenAI vector store metadata
-    kvstore: KVStore | None
+    # Implementing classes should call super().__init__() in their __init__ method
+    # to properly initialize the mixin attributes.
+    def __init__(self, files_api: Files | None = None, kvstore: KVStore | None = None):
+        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
+        self.openai_file_batches: dict[str, dict[str, Any]] = {}
+        self.files_api = files_api
+        self.kvstore = kvstore
+        self._last_file_batch_cleanup_time = 0
+        self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}

    async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
        """Save vector store metadata to persistent storage."""
@ -159,9 +169,129 @@ class OpenAIVectorStoreMixin(ABC):
        for idx in range(len(raw_items)):
            await self.kvstore.delete(f"{contents_prefix}{idx}")

+    async def _save_openai_vector_store_file_batch(self, batch_id: str, batch_info: dict[str, Any]) -> None:
+        """Save file batch metadata to persistent storage."""
+        assert self.kvstore
+        key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
+        await self.kvstore.set(key=key, value=json.dumps(batch_info))
+        # update in-memory cache
+        self.openai_file_batches[batch_id] = batch_info
+
+    async def _load_openai_vector_store_file_batches(self) -> dict[str, dict[str, Any]]:
+        """Load all file batch metadata from persistent storage."""
+        assert self.kvstore
+        start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
+        end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
+        stored_data = await self.kvstore.values_in_range(start_key, end_key)
+
+        batches: dict[str, dict[str, Any]] = {}
+        for item in stored_data:
+            info = json.loads(item)
+            batches[info["id"]] = info
+        return batches
+
+    async def _delete_openai_vector_store_file_batch(self, batch_id: str) -> None:
+        """Delete file batch metadata from persistent storage and in-memory cache."""
+        assert self.kvstore
+        key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{batch_id}"
+        await self.kvstore.delete(key)
+        # remove from in-memory cache
+        self.openai_file_batches.pop(batch_id, None)
+
+    async def _cleanup_expired_file_batches(self) -> None:
+        """Clean up expired file batches from persistent storage."""
+        assert self.kvstore
+        start_key = OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX
+        end_key = f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}\xff"
+        stored_data = await self.kvstore.values_in_range(start_key, end_key)
+
+        current_time = int(time.time())
+        expired_count = 0
+
+        for item in stored_data:
+            info = json.loads(item)
+            expires_at = info.get("expires_at")
+            if expires_at and current_time > expires_at:
+                logger.info(f"Cleaning up expired file batch: {info['id']}")
+                await self.kvstore.delete(f"{OPENAI_VECTOR_STORES_FILE_BATCHES_PREFIX}{info['id']}")
+                # Remove from in-memory cache if present
+                self.openai_file_batches.pop(info["id"], None)
+                expired_count += 1
+
+        if expired_count > 0:
+            logger.info(f"Cleaned up {expired_count} expired file batches")
+
+    async def _get_completed_files_in_batch(self, vector_store_id: str, file_ids: list[str]) -> set[str]:
+        """Determine which files in a batch are actually completed by checking vector store file_ids."""
+        if vector_store_id not in self.openai_vector_stores:
+            return set()
+
+        store_info = self.openai_vector_stores[vector_store_id]
+        completed_files = set(file_ids) & set(store_info["file_ids"])
+        return completed_files
+
+    async def _analyze_batch_completion_on_resume(self, batch_id: str, batch_info: dict[str, Any]) -> list[str]:
+        """Analyze batch completion status and return remaining files to process.
+
+        Returns:
+            List of file IDs that still need processing. Empty list if batch is complete.
+        """
+        vector_store_id = batch_info["vector_store_id"]
+        all_file_ids = batch_info["file_ids"]
+
+        # Find files that are actually completed
+        completed_files = await self._get_completed_files_in_batch(vector_store_id, all_file_ids)
+        remaining_files = [file_id for file_id in all_file_ids if file_id not in completed_files]
+
+        completed_count = len(completed_files)
+        total_count = len(all_file_ids)
+        remaining_count = len(remaining_files)
+
+        # Update file counts to reflect actual state
+        batch_info["file_counts"] = {
+            "completed": completed_count,
+            "failed": 0,  # We don't track failed files during resume - they'll be retried
+            "in_progress": remaining_count,
+            "cancelled": 0,
+            "total": total_count,
+        }
+
+        # If all files are already completed, mark batch as completed
+        if remaining_count == 0:
+            batch_info["status"] = "completed"
+            logger.info(f"Batch {batch_id} is already fully completed, updating status")
+
+        # Save updated batch info
+        await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+        return remaining_files
+
+    async def _resume_incomplete_batches(self) -> None:
+        """Resume processing of incomplete file batches after server restart."""
+        for batch_id, batch_info in self.openai_file_batches.items():
+            if batch_info["status"] == "in_progress":
+                logger.info(f"Analyzing incomplete file batch: {batch_id}")
+
+                remaining_files = await self._analyze_batch_completion_on_resume(batch_id, batch_info)
+
+                # Check if batch is now completed after analysis
+                if batch_info["status"] == "completed":
+                    continue
+
+                if remaining_files:
+                    logger.info(f"Resuming batch {batch_id} with {len(remaining_files)} remaining files")
+                    # Restart the background processing task with only remaining files
+                    task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info, remaining_files))
+                    self._file_batch_tasks[batch_id] = task
+
    async def initialize_openai_vector_stores(self) -> None:
-        """Load existing OpenAI vector stores into the in-memory cache."""
+        """Load existing OpenAI vector stores and file batches into the in-memory cache."""
        self.openai_vector_stores = await self._load_openai_vector_stores()
+        self.openai_file_batches = await self._load_openai_vector_store_file_batches()
+        self._file_batch_tasks = {}
+        # TODO: Resume only works for single worker deployment. Jobs with multiple workers will need to be handled differently.
+        await self._resume_incomplete_batches()
+        self._last_file_batch_cleanup_time = 0

    @abstractmethod
    async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
@ -457,7 +587,7 @@ class OpenAIVectorStoreMixin(ABC):
                content = self._chunk_to_vector_store_content(chunk)

                response_data_item = VectorStoreSearchResponse(
-                    file_id=chunk.metadata.get("file_id", ""),
+                    file_id=chunk.metadata.get("document_id", ""),
                    filename=chunk.metadata.get("filename", ""),
                    score=score,
                    attributes=chunk.metadata,
@ -570,6 +700,14 @@ class OpenAIVectorStoreMixin(ABC):
        if vector_store_id not in self.openai_vector_stores:
            raise VectorStoreNotFoundError(vector_store_id)

+        # Check if file is already attached to this vector store
+        store_info = self.openai_vector_stores[vector_store_id]
+        if file_id in store_info["file_ids"]:
+            logger.warning(f"File {file_id} is already attached to vector store {vector_store_id}, skipping")
+            # Return existing file object
+            file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+            return VectorStoreFileObject(**file_info)
+
        attributes = attributes or {}
        chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
        created_at = int(time.time())
@ -608,14 +746,16 @@ class OpenAIVectorStoreMixin(ABC):

            content = content_from_data_and_mime_type(content_response.body, mime_type)

+            chunk_attributes = attributes.copy()
+            chunk_attributes["filename"] = file_response.filename
+
            chunks = make_overlapped_chunks(
                file_id,
                content,
                max_chunk_size_tokens,
                chunk_overlap_tokens,
-                attributes,
+                chunk_attributes,
            )
-
            if not chunks:
                vector_store_file_object.status = "failed"
                vector_store_file_object.last_error = VectorStoreFileLastError(
@ -828,7 +968,230 @@ class OpenAIVectorStoreMixin(ABC):
        chunking_strategy: VectorStoreChunkingStrategy | None = None,
    ) -> VectorStoreFileBatchObject:
        """Create a vector store file batch."""
-        raise NotImplementedError("openai_create_vector_store_file_batch is not implemented yet")
+        if vector_store_id not in self.openai_vector_stores:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
+
+        created_at = int(time.time())
+        batch_id = f"batch_{uuid.uuid4()}"
+        # File batches expire after 7 days
+        expires_at = created_at + (7 * 24 * 60 * 60)
+
+        # Initialize batch file counts - all files start as in_progress
+        file_counts = VectorStoreFileCounts(
+            completed=0,
+            cancelled=0,
+            failed=0,
+            in_progress=len(file_ids),
+            total=len(file_ids),
+        )
+
+        # Create batch object immediately with in_progress status
+        batch_object = VectorStoreFileBatchObject(
+            id=batch_id,
+            created_at=created_at,
+            vector_store_id=vector_store_id,
+            status="in_progress",
+            file_counts=file_counts,
+        )
+
+        batch_info = {
+            **batch_object.model_dump(),
+            "file_ids": file_ids,
+            "attributes": attributes,
+            "chunking_strategy": chunking_strategy.model_dump(),
+            "expires_at": expires_at,
+        }
+        await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+        # Start background processing of files
+        task = asyncio.create_task(self._process_file_batch_async(batch_id, batch_info))
+        self._file_batch_tasks[batch_id] = task
+
+        # Run cleanup if needed (throttled to once every 1 day)
+        current_time = int(time.time())
+        if current_time - self._last_file_batch_cleanup_time >= FILE_BATCH_CLEANUP_INTERVAL_SECONDS:
+            logger.info("Running throttled cleanup of expired file batches")
+            asyncio.create_task(self._cleanup_expired_file_batches())
+            self._last_file_batch_cleanup_time = current_time
+
+        return batch_object
+
+    async def _process_files_with_concurrency(
+        self,
+        file_ids: list[str],
+        vector_store_id: str,
+        attributes: dict[str, Any],
+        chunking_strategy_obj: Any,
+        batch_id: str,
+        batch_info: dict[str, Any],
+    ) -> None:
+        """Process files with controlled concurrency and chunking."""
+        semaphore = asyncio.Semaphore(MAX_CONCURRENT_FILES_PER_BATCH)
+
+        async def process_single_file(file_id: str) -> tuple[str, bool]:
+            """Process a single file with concurrency control."""
+            async with semaphore:
+                try:
+                    vector_store_file_object = await self.openai_attach_file_to_vector_store(
+                        vector_store_id=vector_store_id,
+                        file_id=file_id,
+                        attributes=attributes,
+                        chunking_strategy=chunking_strategy_obj,
+                    )
+                    return file_id, vector_store_file_object.status == "completed"
+                except Exception as e:
+                    logger.error(f"Failed to process file {file_id} in batch {batch_id}: {e}")
+                    return file_id, False
+
+        # Process files in chunks to avoid creating too many tasks at once
+        total_files = len(file_ids)
+        for chunk_start in range(0, total_files, FILE_BATCH_CHUNK_SIZE):
+            chunk_end = min(chunk_start + FILE_BATCH_CHUNK_SIZE, total_files)
+            chunk = file_ids[chunk_start:chunk_end]
+
+            chunk_num = chunk_start // FILE_BATCH_CHUNK_SIZE + 1
+            total_chunks = (total_files + FILE_BATCH_CHUNK_SIZE - 1) // FILE_BATCH_CHUNK_SIZE
+            logger.info(
+                f"Processing chunk {chunk_num} of {total_chunks} ({len(chunk)} files, {chunk_start + 1}-{chunk_end} of {total_files} total files)"
+            )
+
+            async with asyncio.TaskGroup() as tg:
+                chunk_tasks = [tg.create_task(process_single_file(file_id)) for file_id in chunk]
+
+            chunk_results = [task.result() for task in chunk_tasks]
+
+            # Update counts after each chunk for progressive feedback
+            for _, success in chunk_results:
+                self._update_file_counts(batch_info, success=success)
+
+            # Save progress after each chunk
+            await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+    def _update_file_counts(self, batch_info: dict[str, Any], success: bool) -> None:
+        """Update file counts based on processing result."""
+        if success:
+            batch_info["file_counts"]["completed"] += 1
+        else:
+            batch_info["file_counts"]["failed"] += 1
+        batch_info["file_counts"]["in_progress"] -= 1
+
+    def _update_batch_status(self, batch_info: dict[str, Any]) -> None:
+        """Update final batch status based on file processing results."""
+        if batch_info["file_counts"]["failed"] == 0:
+            batch_info["status"] = "completed"
+        elif batch_info["file_counts"]["completed"] == 0:
+            batch_info["status"] = "failed"
+        else:
+            batch_info["status"] = "completed"  # Partial success counts as completed
+
+    async def _process_file_batch_async(
+        self,
+        batch_id: str,
+        batch_info: dict[str, Any],
+        override_file_ids: list[str] | None = None,
+    ) -> None:
+        """Process files in a batch asynchronously in the background."""
+        file_ids = override_file_ids if override_file_ids is not None else batch_info["file_ids"]
+        attributes = batch_info["attributes"]
+        chunking_strategy = batch_info["chunking_strategy"]
+        vector_store_id = batch_info["vector_store_id"]
+        chunking_strategy_adapter: TypeAdapter[VectorStoreChunkingStrategy] = TypeAdapter(VectorStoreChunkingStrategy)
+        chunking_strategy_obj = chunking_strategy_adapter.validate_python(chunking_strategy)
+
+        try:
+            # Process all files with controlled concurrency
+            await self._process_files_with_concurrency(
+                file_ids=file_ids,
+                vector_store_id=vector_store_id,
+                attributes=attributes,
+                chunking_strategy_obj=chunking_strategy_obj,
+                batch_id=batch_id,
+                batch_info=batch_info,
+            )
+
+            # Update final batch status
+            self._update_batch_status(batch_info)
+            await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+            logger.info(f"File batch {batch_id} processing completed with status: {batch_info['status']}")
+
+        except asyncio.CancelledError:
+            logger.info(f"File batch {batch_id} processing was cancelled")
+            # Clean up task reference if it still exists
+            self._file_batch_tasks.pop(batch_id, None)
+            raise  # Re-raise to ensure proper cancellation propagation
+        finally:
+            # Always clean up task reference when processing ends
+            self._file_batch_tasks.pop(batch_id, None)
+
+    def _get_and_validate_batch(self, batch_id: str, vector_store_id: str) -> dict[str, Any]:
+        """Get and validate batch exists and belongs to vector store."""
+        if vector_store_id not in self.openai_vector_stores:
+            raise VectorStoreNotFoundError(vector_store_id)
+
+        if batch_id not in self.openai_file_batches:
+            raise ValueError(f"File batch {batch_id} not found")
+
+        batch_info = self.openai_file_batches[batch_id]
+
+        # Check if batch has expired (read-only check)
+        expires_at = batch_info.get("expires_at")
+        if expires_at:
+            current_time = int(time.time())
+            if current_time > expires_at:
+                raise ValueError(f"File batch {batch_id} has expired after 7 days from creation")
+
+        if batch_info["vector_store_id"] != vector_store_id:
+            raise ValueError(f"File batch {batch_id} does not belong to vector store {vector_store_id}")
+
+        return batch_info
+
+    def _paginate_objects(
+        self,
+        objects: list[Any],
+        limit: int | None = 20,
+        after: str | None = None,
+        before: str | None = None,
+    ) -> tuple[list[Any], bool, str | None, str | None]:
+        """Apply pagination to a list of objects with id fields."""
+        limit = min(limit or 20, 100)  # Cap at 100 as per OpenAI
+
+        # Find start index
+        start_idx = 0
+        if after:
+            for i, obj in enumerate(objects):
+                if obj.id == after:
+                    start_idx = i + 1
+                    break
+
+        # Find end index
+        end_idx = start_idx + limit
+        if before:
+            for i, obj in enumerate(objects[start_idx:], start_idx):
+                if obj.id == before:
+                    end_idx = i
+                    break
+
+        # Apply pagination
+        paginated_objects = objects[start_idx:end_idx]
+
+        # Determine pagination info
+        has_more = end_idx < len(objects)
+        first_id = paginated_objects[0].id if paginated_objects else None
+        last_id = paginated_objects[-1].id if paginated_objects else None
+
+        return paginated_objects, has_more, first_id, last_id
+
+    async def openai_retrieve_vector_store_file_batch(
+        self,
+        batch_id: str,
+        vector_store_id: str,
+    ) -> VectorStoreFileBatchObject:
+        """Retrieve a vector store file batch."""
+        batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
+        return VectorStoreFileBatchObject(**batch_info)

    async def openai_list_files_in_vector_store_file_batch(
        self,
@ -841,15 +1204,39 @@ class OpenAIVectorStoreMixin(ABC):
        order: str | None = "desc",
    ) -> VectorStoreFilesListInBatchResponse:
        """Returns a list of vector store files in a batch."""
-        raise NotImplementedError("openai_list_files_in_vector_store_file_batch is not implemented yet")
+        batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
+        batch_file_ids = batch_info["file_ids"]

-    async def openai_retrieve_vector_store_file_batch(
-        self,
-        batch_id: str,
-        vector_store_id: str,
-    ) -> VectorStoreFileBatchObject:
-        """Retrieve a vector store file batch."""
-        raise NotImplementedError("openai_retrieve_vector_store_file_batch is not implemented yet")
+        # Load file objects for files in this batch
+        batch_file_objects = []
+
+        for file_id in batch_file_ids:
+            try:
+                file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+                file_object = VectorStoreFileObject(**file_info)
+
+                # Apply status filter if provided
+                if filter and file_object.status != filter:
+                    continue
+
+                batch_file_objects.append(file_object)
+            except Exception as e:
+                logger.warning(f"Could not load file {file_id} from batch {batch_id}: {e}")
+                continue
+
+        # Sort by created_at
+        reverse_order = order == "desc"
+        batch_file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
+
+        # Apply pagination using helper
+        paginated_files, has_more, first_id, last_id = self._paginate_objects(batch_file_objects, limit, after, before)
+
+        return VectorStoreFilesListInBatchResponse(
+            data=paginated_files,
+            first_id=first_id,
+            last_id=last_id,
+            has_more=has_more,
+        )

    async def openai_cancel_vector_store_file_batch(
        self,
@ -857,4 +1244,24 @@ class OpenAIVectorStoreMixin(ABC):
        vector_store_id: str,
    ) -> VectorStoreFileBatchObject:
        """Cancel a vector store file batch."""
-        raise NotImplementedError("openai_cancel_vector_store_file_batch is not implemented yet")
+        batch_info = self._get_and_validate_batch(batch_id, vector_store_id)
+
+        if batch_info["status"] not in ["in_progress"]:
+            raise ValueError(f"Cannot cancel batch {batch_id} with status {batch_info['status']}")
+
+        # Cancel the actual processing task if it exists
+        if batch_id in self._file_batch_tasks:
+            task = self._file_batch_tasks[batch_id]
+            if not task.done():
+                task.cancel()
+                logger.info(f"Cancelled processing task for file batch: {batch_id}")
+            # Remove from task tracking
+            del self._file_batch_tasks[batch_id]
+
+        batch_info["status"] = "cancelled"
+
+        await self._save_openai_vector_store_file_batch(batch_id, batch_info)
+
+        updated_batch = VectorStoreFileBatchObject(**batch_info)
+
+        return updated_batch
--- a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
+++ b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
@ -14,23 +14,22 @@
      "__data__": {
        "models": [
          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-04T12:20:09.202126-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-06T16:41:45.231544-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
            "details": {
              "parent_model": "",
              "format": "gguf",
-              "family": "llama",
+              "family": "bert",
              "families": [
-                "llama"
+                "bert"
              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            },
-            "context_length": 4096
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
          }
        ]
      }
--- a/tests/integration/common/recordings/8b6244e7be7e4d03874b13df9cb5af4cccef848e79440e4298528185efe45ed4.json
+++ b/tests/integration/common/recordings/8b6244e7be7e4d03874b13df9cb5af4cccef848e79440e4298528185efe45ed4.json
@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is the content of test file 2"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.036525182,
+              -0.0072787926,
+              -0.15320797,
+              -0.028591355,
+              0.028115708,
+              -0.0033384967,
+              0.021494914,
+              -0.023697548,
+              -0.059428893,
+              -0.04412936,
+              -0.014445912,
+              0.06520278,
+              0.013848802,
+              -0.029918822,
+              -0.022794332,
+              -0.012578859,
+              0.060358867,
+              -0.031223036,
+              -0.012306958,
+              -0.028883344,
+              -0.014677056,
+              -0.024171423,
+              -0.047258105,
+              -0.019668069,
+              0.10096786,
+              0.042677063,
+              -0.012945782,
+              0.05772575,
+              -0.09481949,
+              -0.013522372,
+              0.058091108,
+              -0.035321448,
+              0.02014728,
+              -0.06760144,
+              -0.012323442,
+              -0.045392025,
+              0.04685687,
+              0.024927035,
+              -0.0017673819,
+              0.036423087,
+              -0.020881223,
+              -0.010788712,
+              -0.01838111,
+              -0.007801951,
+              -0.011164214,
+              -0.022797823,
+              -0.01222212,
+              0.05638416,
+              -0.01662934,
+              -0.04117776,
+              0.004534807,
+              0.019233605,
+              -0.008680182,
+              0.03177389,
+              0.06082287,
+              -0.010224672,
+              -0.018689552,
+              -0.019074611,
+              0.029412521,
+              -0.06990004,
+              0.054043073,
+              0.027053045,
+              -0.049923293,
+              0.058975294,
+              0.0018301148,
+              -0.06718531,
+              -0.044889167,
+              0.032761537,
+              -0.022604113,
+              0.043496683,
+              0.08500273,
+              0.008184364,
+              0.0011824819,
+              -0.0417156,
+              -0.015855035,
+              -0.06935983,
+              0.01533393,
+              -0.03297617,
+              -0.043794934,
+              0.008973833,
+              0.0415081,
+              0.037018586,
+              0.004035694,
+              0.0067175985,
+              0.058073524,
+              -0.033033613,
+              -0.049569633,
+              -0.011724681,
+              -0.0049699075,
+              0.04405061,
+              0.02349984,
+              0.049434863,
+              0.05952279,
+              0.007926657,
+              -0.019564645,
+              0.028824113,
+              0.030559592,
+              0.044332445,
+              -0.03705847,
+              0.013914022,
+              -0.01584405,
+              0.012503536,
+              0.039434463,
+              0.020927113,
+              0.02458832,
+              0.033364173,
+              -0.0013068066,
+              0.025197528,
+              -0.05292493,
+              0.010358094,
+              -0.018871995,
+              0.039280638,
+              -0.048534855,
+              0.004642058,
+              0.011491514,
+              -0.036328327,
+              0.0637683,
+              -0.0360448,
+              -0.04317744,
+              0.03721341,
+              0.009880729,
+              -0.032810695,
+              0.012197031,
+              0.06644975,
+              0.04497407,
+              0.0018043267,
+              -0.076117076,
+              0.0028520897,
+              0.025521474,
+              -0.04780887,
+              -0.015784036,
+              -0.004914463,
+              -0.0003810333,
+              -0.008213055,
+              -0.0040868036,
+              0.0026211734,
+              0.005037653,
+              -0.0054035867,
+              -0.054472372,
+              -0.04214955,
+              -0.036636207,
+              0.005277914,
+              0.025802922,
+              0.054448027,
+              0.010910778,
+              -0.019098252,
+              0.06248315,
+              0.019785397,
+              -0.02148464,
+              -0.023303429,
+              0.0124828555,
+              -0.02455258,
+              0.0053893207,
+              0.006655952,
+              0.020618292,
+              -0.05195774,
+              0.001724354,
+              -0.049451906,
+              0.031900283,
+              0.08541784,
+              0.02900063,
+              0.006208959,
+              -0.009606019,
+              0.0030572556,
+              -0.018463623,
+              0.014401457,
+              0.0007510511,
+              0.08289015,
+              0.062720075,
+              -0.010840198,
+              -0.04971401,
+              -0.038808372,
+              0.0044536674,
+              0.011472072,
+              -0.031167375,
+              -0.031224154,
+              0.011706744,
+              -0.022990009,
+              0.04747808,
+              -0.0016337503,
+              0.015181135,
+              0.005154193,
+              0.00949444,
+              0.042812645,
+              0.001253686,
+              -0.050080713,
+              0.038098942,
+              -0.014367589,
+              -0.043111958,
+              -0.0059632747,
+              -0.022602718,
+              -0.0042201183,
+              -0.09451348,
+              -0.042164654,
+              -0.010821582,
+              -0.04681359,
+              0.016275495,
+              -0.0033313567,
+              0.027538816,
+              -0.019907625,
+              0.00040033093,
+              -0.030824887,
+              -0.058938056,
+              0.0014922265,
+              -0.027667042,
+              0.015573365,
+              -0.04173136,
+              -0.015453809,
+              -0.01595607,
+              0.03898053,
+              0.043484144,
+              0.0075124763,
+              -0.0025220348,
+              0.038111743,
+              0.041447856,
+              -0.011153068,
+              -0.01717726,
+              -0.045249123,
+              -0.010734678,
+              -0.03552057,
+              0.033035237,
+              -0.0077519426,
+              0.048082184,
+              -0.06981011,
+              0.034551185,
+              0.011257734,
+              -0.043801457,
+              -0.018373946,
+              -0.04797999,
+              -0.017102923,
+              0.0029698398,
+              -0.09975526,
+              0.00053959514,
+              0.0074329274,
+              -0.018584883,
+              -0.0094977375,
+              -0.05056549,
+              0.08929669,
+              0.011828429,
+              0.040005405,
+              -0.03369542,
+              0.07867971,
+              0.025032107,
+              0.016890414,
+              0.014425124,
+              0.00064274436,
+              0.009868133,
+              -0.034772366,
+              0.05254746,
+              0.071544185,
+              -0.01852601,
+              -0.0013607002,
+              0.010325862,
+              0.0647086,
+              0.013452749,
+              -0.009807788,
+              -0.01738053,
+              -0.012833702,
+              -0.0037767375,
+              -0.046967912,
+              0.017845146,
+              -0.0682881,
+              0.011557345,
+              0.01458601,
+              -0.048856564,
+              -0.01423403,
+              -0.03424404,
+              0.021640293,
+              -0.025939032,
+              -0.001273354,
+              0.0033471219,
+              0.02255794,
+              -0.05386608,
+              0.02134696,
+              0.012213072,
+              -0.027799206,
+              0.041816894,
+              0.013318655,
+              -0.027756989,
+              0.03054267,
+              -0.025455547,
+              0.014977695,
+              0.03629763,
+              0.05029929,
+              0.017317088,
+              0.0008021539,
+              -0.027486524,
+              0.0011794426,
+              0.021061994,
+              0.038059466,
+              0.014114616,
+              0.014319938,
+              0.012650396,
+              0.04102732,
+              0.018222608,
+              0.0115328785,
+              0.043359082,
+              -0.0028082337,
+              -0.016893078,
+              -0.03791571,
+              0.023969462,
+              0.0077467947,
+              0.033167463,
+              0.018768141,
+              0.00804635,
+              -0.05316497,
+              0.021600094,
+              -0.032088757,
+              0.056640208,
+              0.010592809,
+              -0.06282453,
+              -0.003963599,
+              -0.0054780785,
+              0.0057015507,
+              -0.026736109,
+              0.03140229,
+              0.021742998,
+              0.037487593,
+              0.04916904,
+              -0.015454876,
+              0.0036427178,
+              -0.06809397,
+              -0.005600329,
+              0.006426826,
+              0.029163402,
+              0.008698685,
+              0.013447198,
+              0.028116653,
+              -0.032959465,
+              -0.046715226,
+              0.062885955,
+              0.07805104,
+              -0.075704284,
+              -0.026722923,
+              0.031568483,
+              0.029869428,
+              0.014207811,
+              0.058283728,
+              -0.0009454238,
+              0.049990628,
+              0.09433687,
+              0.011483032,
+              0.0073822956,
+              0.001765557,
+              0.014384013,
+              -0.0805711,
+              -0.057262138,
+              0.0033087756,
+              0.017576102,
+              0.050261848,
+              -0.0058530914,
+              -0.00258757,
+              0.009722071,
+              0.0044941446,
+              0.009631424,
+              0.027689122,
+              0.012394503,
+              -0.04055002,
+              0.055514883,
+              -0.028808117,
+              0.0297643,
+              -0.034311485,
+              0.021378465,
+              -0.033280674,
+              0.019441161,
+              -0.009369208,
+              0.0030489776,
+              -0.016572703,
+              0.042294934,
+              0.015723946,
+              0.0022674324,
+              -0.0014906601,
+              0.01840701,
+              0.059862193,
+              0.053135127,
+              0.020754104,
+              -0.06374346,
+              0.001787633,
+              -0.036681958,
+              0.03553359,
+              0.06609074,
+              -0.0107706385,
+              0.045129295,
+              0.06838197,
+              0.025984539,
+              -0.06558362,
+              0.027897354,
+              -0.00621841,
+              0.03920637,
+              0.009362378,
+              -0.062093496,
+              0.021269219,
+              -0.06091154,
+              -0.027098468,
+              0.008638457,
+              -0.050488967,
+              0.04693317,
+              0.043328438,
+              -0.025587596,
+              0.03407469,
+              -0.048816204,
+              -0.004734613,
+              -0.0008902356,
+              0.024133636,
+              -0.022534605,
+              0.035635088,
+              -0.053277653,
+              -0.055609506,
+              0.0523981,
+              0.0014473854,
+              0.032570753,
+              -0.005762427,
+              -0.016173452,
+              -0.06672014,
+              0.0013724786,
+              0.007844828,
+              0.02429992,
+              0.0032019925,
+              0.0016553001,
+              -0.022802994,
+              0.001800882,
+              0.032480165,
+              -0.002195562,
+              -0.03154405,
+              -0.013679192,
+              -0.011184489,
+              0.033688888,
+              0.04774288,
+              0.0018061483,
+              -0.09035719,
+              -0.0047670994,
+              -0.02052915,
+              0.036272082,
+              0.020193182,
+              -0.036813166,
+              0.039460275,
+              -0.015967365,
+              -0.0033895948,
+              -0.031828586,
+              0.053221144,
+              0.021549668,
+              -0.07595095,
+              -0.044737455,
+              -0.010761814,
+              0.0025511624,
+              0.14498504,
+              0.08222001,
+              -0.037528154,
+              -0.032176156,
+              0.013683398,
+              0.01410672,
+              0.019557275,
+              0.062485218,
+              0.027925756,
+              0.079192385,
+              -0.026622739,
+              0.02323037,
+              -0.016175434,
+              -0.032527965,
+              -0.008870566,
+              -0.009013046,
+              -0.009945577,
+              0.025208296,
+              0.0073141777,
+              0.044331536,
+              -0.020921648,
+              -0.016868133,
+              -0.026842397,
+              0.03165012,
+              0.043120645,
+              -0.048179835,
+              -0.05591947,
+              0.029399967,
+              -0.069223806,
+              0.03508237,
+              0.00804212,
+              -0.041150257,
+              0.008898182,
+              0.0006015489,
+              0.023109462,
+              0.027766718,
+              0.012039964,
+              -0.030886615,
+              -0.030401329,
+              0.038484607,
+              -0.0247026,
+              0.0018090954,
+              0.028525416,
+              0.054761168,
+              -0.0062592058,
+              0.029739086,
+              0.033199638,
+              0.0488184,
+              0.028191078,
+              -0.020734766,
+              0.00060847827,
+              0.029920708,
+              -0.0490555,
+              0.007290553,
+              0.0026984178,
+              0.063341014,
+              0.018249765,
+              0.019682994,
+              0.0063302247,
+              -0.029094942,
+              -0.030193835,
+              0.042414594,
+              -0.05859321,
+              -0.09094711,
+              -0.025345713,
+              -0.034041878,
+              -0.014829038,
+              0.0030920506,
+              0.015670862,
+              0.073476,
+              0.017715238,
+              0.052982714,
+              0.012198469,
+              -0.021962965,
+              0.017349334,
+              0.025136312,
+              0.006353252,
+              0.03436416,
+              -0.01633907,
+              -0.08311436,
+              0.04788054,
+              0.0032672018,
+              -0.0318856,
+              0.06784985,
+              0.072452076,
+              0.009116457,
+              0.017004106,
+              -0.040795434,
+              -0.023130566,
+              -0.0017866351,
+              -0.020753238,
+              -0.028738804,
+              0.0031001552,
+              -0.012533389,
+              0.047431413,
+              -0.059432007,
+              -0.019904893,
+              0.009464013,
+              -0.016388606,
+              0.028543858,
+              -0.026128467,
+              -0.03368374,
+              -0.0040021804,
+              3.1505784e-05,
+              -0.10005339,
+              0.020524276,
+              -0.06320255,
+              -0.026909621,
+              -0.009929203,
+              0.03084924,
+              -0.041759893,
+              0.02034976,
+              -0.008311877,
+              -0.0042031757,
+              -0.04709363,
+              0.030620687,
+              -0.028947143,
+              -0.007556809,
+              0.01617724,
+              0.037857477,
+              -0.039480377,
+              -0.008805032,
+              0.051410846,
+              0.017079966,
+              0.0032464731,
+              0.023022559,
+              -0.017350538,
+              0.03471975,
+              -0.02863222,
+              -0.024592673,
+              -0.0077179587,
+              0.03141146,
+              0.03583118,
+              -0.0130302245,
+              -0.057425633,
+              0.040003538,
+              -0.0046423534,
+              0.019725544,
+              0.0397109,
+              -0.0025461344,
+              0.046675395,
+              0.011516851,
+              -0.029444098,
+              0.03419632,
+              -0.043872464,
+              -0.021072017,
+              -0.010389852,
+              0.01248914,
+              -0.03476949,
+              0.02083105,
+              -0.021170666,
+              -0.010824939,
+              -0.034223318,
+              0.0008804664,
+              -0.00975538,
+              -0.004145119,
+              0.0062736045,
+              0.017810361,
+              -0.05057402,
+              0.0028876425,
+              -0.012459405,
+              0.024415256,
+              -0.009684934,
+              -0.032268245,
+              -1.0135974e-05,
+              0.015377202,
+              -0.008089165,
+              -0.08534785,
+              0.011209079,
+              -0.006432232,
+              -0.05970185,
+              0.03646468,
+              -0.024002092,
+              -0.022855703,
+              -0.051673587,
+              0.038473092,
+              -0.028756764,
+              0.041329525,
+              -0.06377881,
+              -0.014500157,
+              -0.018372798,
+              -0.008677442,
+              0.036858637,
+              0.038448237,
+              0.044321943,
+              -0.046770208,
+              0.026638264,
+              -0.04069364,
+              -0.051563717,
+              -0.054425545,
+              -0.007966239,
+              -0.045169767,
+              -0.0006122694,
+              0.013411372,
+              0.04263278,
+              0.03749003,
+              0.010722818,
+              -0.041889716,
+              -0.036726084,
+              0.014166507,
+              0.038341004,
+              0.004509263,
+              0.035988707,
+              0.02634235,
+              -0.02256134,
+              0.08171513,
+              0.09104147,
+              0.06757358,
+              -0.0016213343,
+              -0.018941583,
+              -0.0014519675,
+              0.03409365,
+              -0.060576558,
+              -0.028001321,
+              -0.08352477,
+              0.011629786,
+              0.014637305,
+              -0.021191692,
+              0.009192876,
+              0.0025693115,
+              0.03831378,
+              -0.00035758872,
+              -0.032391928,
+              0.006118005,
+              -0.05773841,
+              0.033030152,
+              0.03268179,
+              0.031052263,
+              -0.0018795256,
+              -0.0463158,
+              0.017675944,
+              0.039604764,
+              0.056545958,
+              -0.002072885,
+              -0.0374349,
+              -0.014934615,
+              -0.046360567,
+              0.060439337,
+              -5.3795357e-05,
+              0.027416907,
+              -0.08041611,
+              0.00016825287,
+              -0.08668716,
+              -0.03210328,
+              0.016515074,
+              -0.0062253834,
+              -0.00093463395,
+              -0.027180947,
+              -0.049670145,
+              -0.033094753,
+              -0.0051170597,
+              0.031569846,
+              -0.014995825,
+              -0.016850019,
+              0.04239559,
+              0.020676404,
+              0.0319851,
+              -0.008854607,
+              0.04452473,
+              -0.023021534,
+              0.007295005,
+              0.05227394,
+              0.0040576537,
+              -0.0655794,
+              -0.067981854,
+              0.03440047,
+              0.009278226,
+              -0.0282169,
+              0.060756575,
+              -0.020904224,
+              0.01505642,
+              -0.0045534745,
+              0.018723203,
+              0.0035253377,
+              0.011872832,
+              0.042355374,
+              0.017724074,
+              -0.060881015,
+              0.010464869,
+              -0.015852634,
+              -0.03751531,
+              0.022855468,
+              -0.037866883,
+              0.05328077,
+              -0.0320521,
+              -0.030350016,
+              -0.010912554,
+              -0.012704745,
+              0.0076516517,
+              0.0014142905,
+              0.011725254,
+              0.0067488,
+              -0.008221275,
+              0.01648301,
+              -0.013712469,
+              0.0129476935,
+              0.028405288,
+              -0.011489568,
+              -0.006695754,
+              -0.07523671,
+              0.0012562524,
+              -0.051538818,
+              0.017514601,
+              0.03280067,
+              -0.018965578,
+              0.009017527,
+              -0.052108284,
+              0.0017074383,
+              0.00056099903,
+              0.008343997,
+              -0.01674154,
+              -0.012425597,
+              -0.00041037227,
+              0.1104793,
+              -0.015096156,
+              0.014880369,
+              -0.0098567465,
+              0.024937985,
+              0.0112802675,
+              -0.0010737488,
+              -0.06354736,
+              -3.862344e-05,
+              -0.024247888
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/9ca52f6470a742d637b9da12ff00b4ab85adbbe4903193947ce19260447e8619.json
+++ b/tests/integration/common/recordings/9ca52f6470a742d637b9da12ff00b4ab85adbbe4903193947ce19260447e8619.json
@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is batch test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.01183041,
+              -0.0065989625,
+              -0.159677,
+              0.011660306,
+              0.055617318,
+              -0.03764695,
+              0.0163666,
+              0.033777084,
+              -0.06433634,
+              -0.08037781,
+              -0.0057114926,
+              0.07607082,
+              0.033490222,
+              0.048497792,
+              -0.048456103,
+              -0.049539,
+              0.059783153,
+              -0.08439946,
+              0.0076269372,
+              -0.0128732305,
+              0.05902644,
+              0.012931591,
+              -0.08323305,
+              -0.00037215627,
+              0.13830419,
+              0.024290211,
+              -0.047809705,
+              0.039520696,
+              -0.06423598,
+              -0.01653946,
+              0.03764018,
+              -0.001062501,
+              0.028489634,
+              -0.025925444,
+              -0.015699588,
+              -0.012715725,
+              0.017358005,
+              -0.007198467,
+              0.059812553,
+              0.028332362,
+              -0.00015984774,
+              0.004483949,
+              0.034580402,
+              -0.054280724,
+              -0.002989754,
+              0.023461882,
+              0.011839507,
+              0.018908013,
+              0.016710319,
+              0.004905327,
+              -0.0107955905,
+              -0.01565778,
+              -0.04169478,
+              0.02510759,
+              0.026486792,
+              0.01054831,
+              0.011289881,
+              0.038714606,
+              -0.0136384675,
+              -0.023249293,
+              0.014086617,
+              0.018654121,
+              -0.07146624,
+              0.047506154,
+              -0.012085512,
+              -0.007589288,
+              -0.009515838,
+              0.0048574316,
+              -0.004600554,
+              0.0031499087,
+              0.06778753,
+              -0.019641325,
+              0.018102348,
+              -0.01726182,
+              -0.003802732,
+              -0.04414122,
+              -0.010491107,
+              -0.065158285,
+              -0.045328394,
+              -0.0019480857,
+              0.052318677,
+              0.0386049,
+              0.020296056,
+              0.044793047,
+              0.08282699,
+              -0.019911101,
+              -0.016511027,
+              -0.0062321154,
+              -0.025036003,
+              0.04578435,
+              0.0019149093,
+              0.025694296,
+              -0.0042011673,
+              -0.018107908,
+              -0.026668591,
+              0.018340195,
+              0.010810087,
+              0.018672433,
+              -0.006774911,
+              -0.0026458725,
+              0.023082372,
+              0.027705511,
+              0.019753877,
+              -0.03543464,
+              -0.0061461334,
+              0.0155549655,
+              -0.019579103,
+              -0.00693201,
+              -0.06635246,
+              -0.015482261,
+              -0.0040295934,
+              0.0006957319,
+              -0.008871345,
+              -0.00842857,
+              0.031484608,
+              -0.010076284,
+              0.06257018,
+              0.0012318427,
+              -0.024530765,
+              0.00015912329,
+              0.0033331378,
+              -0.032083686,
+              -0.007399188,
+              0.07031288,
+              0.033552274,
+              0.061820872,
+              -0.09171231,
+              0.036374647,
+              0.007984676,
+              -0.031679634,
+              0.00598418,
+              -0.0029291043,
+              -0.0049730917,
+              -0.052057285,
+              0.020125173,
+              0.009004486,
+              -0.022456508,
+              -0.012051283,
+              -0.03740793,
+              -0.027594674,
+              -0.02012376,
+              0.011664398,
+              0.04336321,
+              0.061720803,
+              0.041055538,
+              -0.02444171,
+              0.024476659,
+              0.030615946,
+              -0.01689858,
+              0.0091607245,
+              0.0038629547,
+              -0.0019203863,
+              -0.0035829302,
+              0.021674454,
+              0.037874587,
+              -0.057554636,
+              0.014823112,
+              0.0036189007,
+              0.012866306,
+              0.051631145,
+              0.0021970836,
+              -0.033981066,
+              -0.03782387,
+              0.01235394,
+              -0.057634324,
+              -0.07556398,
+              0.008977255,
+              0.07841102,
+              0.060794022,
+              -0.03463157,
+              -0.063551195,
+              -0.064811006,
+              0.010709957,
+              -0.027145889,
+              -0.0837886,
+              -0.035913587,
+              0.017231362,
+              -0.01455278,
+              0.039031487,
+              -0.038145658,
+              0.023733672,
+              -0.019787688,
+              0.020164428,
+              0.023367887,
+              0.0035691075,
+              -0.028722964,
+              0.014704597,
+              -0.019744202,
+              -0.06668101,
+              -0.017812628,
+              -0.009186517,
+              -0.033119973,
+              -0.085967295,
+              -0.080312125,
+              0.013302178,
+              -0.061551016,
+              0.017130975,
+              -0.017442413,
+              0.04742156,
+              -0.013023663,
+              -0.013847287,
+              -0.01880652,
+              -0.07011646,
+              0.018233122,
+              -0.030537246,
+              -0.026766777,
+              -0.012263141,
+              0.014689888,
+              -0.049961388,
+              0.03201573,
+              0.015774516,
+              -0.020335846,
+              -0.038940914,
+              0.0065977564,
+              0.035997562,
+              -0.053227507,
+              0.008883548,
+              -0.039375745,
+              -0.017865263,
+              0.007343183,
+              0.017375462,
+              0.021595728,
+              0.057712954,
+              -0.040693924,
+              -0.000778912,
+              -0.018082067,
+              -0.015103824,
+              -0.024191063,
+              -0.0077742958,
+              -0.034330968,
+              -0.020159615,
+              -0.03245423,
+              0.0020437704,
+              -0.000114842755,
+              -0.029564297,
+              -0.018030599,
+              -0.0031425157,
+              0.053831782,
+              -0.026106073,
+              0.04243461,
+              -0.048363626,
+              0.025711408,
+              -0.008338205,
+              0.0009197218,
+              -0.011072695,
+              0.00031293565,
+              0.0033421176,
+              -0.007302082,
+              0.04127773,
+              -0.0074836435,
+              -0.04299338,
+              -0.002760089,
+              0.019094143,
+              0.039009947,
+              0.03581834,
+              -0.032022007,
+              -0.009045915,
+              -0.03275861,
+              0.017295409,
+              -0.039618656,
+              0.015396318,
+              -0.07593323,
+              0.03475173,
+              0.007710904,
+              -0.009037294,
+              -0.026630195,
+              -0.027383188,
+              0.02212514,
+              -0.035001624,
+              -0.0219445,
+              -0.01212384,
+              -0.0018017493,
+              -0.011781174,
+              -0.051410057,
+              0.026306989,
+              0.006329408,
+              0.010307703,
+              0.01613663,
+              -0.006002573,
+              0.031006144,
+              -0.036049806,
+              -0.018159281,
+              -0.012575659,
+              -0.0048318235,
+              0.048996273,
+              -0.0010814993,
+              0.050774954,
+              -0.027395276,
+              0.0115728015,
+              0.031056559,
+              0.011177566,
+              0.012006755,
+              -0.02556873,
+              0.029484332,
+              -0.009657058,
+              0.009322593,
+              0.022122696,
+              -0.018415872,
+              0.010098681,
+              -0.007367993,
+              -0.023805562,
+              0.035959154,
+              0.028602934,
+              0.030718775,
+              0.01705538,
+              -0.024984695,
+              0.042858277,
+              -0.015449,
+              0.005040281,
+              0.038991883,
+              -0.07141338,
+              -0.002947093,
+              -0.044420503,
+              0.019382862,
+              -0.040407836,
+              0.04245461,
+              0.048940845,
+              0.018063093,
+              0.08591597,
+              -0.035389014,
+              -0.010674617,
+              -0.103511095,
+              -0.008537786,
+              0.010264984,
+              -0.003966177,
+              0.02314327,
+              0.0048719845,
+              0.06199085,
+              -0.00810136,
+              -0.039515182,
+              0.05785542,
+              0.06719427,
+              -0.039108012,
+              -0.050833326,
+              0.05823837,
+              0.017042343,
+              0.005815163,
+              0.039831843,
+              -0.012049576,
+              0.076485425,
+              0.012621482,
+              0.06927575,
+              0.05359866,
+              -0.015146923,
+              0.044284295,
+              -0.062355984,
+              -0.009034613,
+              0.04071826,
+              -0.01236521,
+              0.079400524,
+              0.0017920422,
+              -0.011480363,
+              0.008711773,
+              0.018180674,
+              -0.0030674522,
+              0.0326583,
+              0.03525443,
+              -0.02087537,
+              0.05094025,
+              -0.0037492628,
+              0.009178962,
+              -0.0050435406,
+              -0.01166052,
+              0.0060158456,
+              -0.002493798,
+              0.021641793,
+              0.0019783853,
+              0.023140313,
+              0.046997964,
+              0.0069999313,
+              -0.0552795,
+              -0.020092534,
+              0.06467227,
+              0.044829298,
+              0.013295184,
+              0.0377816,
+              -0.046331275,
+              0.01770082,
+              -0.013348137,
+              0.04617519,
+              0.04468347,
+              -0.03253012,
+              0.015447477,
+              0.030224748,
+              -0.0013485672,
+              -0.03615717,
+              0.008698818,
+              -0.0037734164,
+              0.04494809,
+              0.037184346,
+              -0.011223347,
+              0.0046344185,
+              -0.07529732,
+              0.025554653,
+              -0.015140733,
+              -0.0035430966,
+              0.03661124,
+              0.013250649,
+              -0.055586766,
+              0.027562145,
+              -0.018204745,
+              -0.029428158,
+              -0.0029150618,
+              0.03623637,
+              -0.022476854,
+              -0.0058649112,
+              -0.015735915,
+              -0.019995706,
+              0.032269973,
+              0.017872665,
+              0.028031865,
+              -0.043758772,
+              -0.027188994,
+              -0.058870632,
+              0.024894219,
+              0.015318543,
+              0.06244725,
+              0.021922529,
+              0.000678521,
+              -0.025339983,
+              0.025911404,
+              0.01583583,
+              -0.014407775,
+              -0.037194725,
+              -0.015699212,
+              0.008184332,
+              0.014927899,
+              0.0737949,
+              0.007748195,
+              -0.07158831,
+              -0.039901625,
+              0.031431172,
+              0.011147712,
+              0.020828275,
+              -0.035193726,
+              0.05613746,
+              -0.0022006142,
+              0.008007006,
+              0.001472366,
+              0.019893395,
+              0.044233263,
+              -0.02244468,
+              -0.0665883,
+              0.013832251,
+              0.0026457622,
+              0.09737926,
+              0.09575702,
+              -0.04908296,
+              -0.062802345,
+              -0.0095988205,
+              0.008329187,
+              0.041316554,
+              -0.0222064,
+              0.02813126,
+              0.07059441,
+              -0.02560012,
+              0.044651207,
+              -0.027545268,
+              -0.007889025,
+              0.03391235,
+              0.008170332,
+              0.0067786956,
+              0.0615806,
+              0.044006567,
+              0.0056231483,
+              -0.024909342,
+              0.040038925,
+              -0.037021257,
+              0.0010181392,
+              0.058034208,
+              -0.021651162,
+              -0.06021004,
+              0.014830516,
+              -0.050770685,
+              0.010422301,
+              0.0016205559,
+              -0.03166853,
+              0.014091049,
+              -0.002066098,
+              0.02992549,
+              0.013251145,
+              0.011673487,
+              -0.0430954,
+              -0.048110887,
+              0.01493126,
+              0.006862025,
+              0.04188833,
+              0.011692501,
+              0.0465231,
+              0.010624,
+              0.02873104,
+              0.037793215,
+              0.08978305,
+              0.011727344,
+              0.043248493,
+              -0.033803374,
+              0.011249601,
+              -0.015437648,
+              -0.009372223,
+              -0.005403984,
+              -0.009915787,
+              -0.030847883,
+              -0.00076942804,
+              0.018497106,
+              -0.00030310496,
+              -0.0076847905,
+              -0.0036222623,
+              -0.008554024,
+              -0.07606582,
+              -0.024716768,
+              -0.028077196,
+              -0.024249833,
+              0.027158285,
+              0.0075863106,
+              0.09348848,
+              -0.00034073484,
+              0.039915837,
+              -0.007647916,
+              -0.035295825,
+              0.01611119,
+              0.060429912,
+              0.009458672,
+              0.027763832,
+              -0.025683967,
+              -0.091306895,
+              0.0367077,
+              0.009893541,
+              -5.195292e-05,
+              0.045459133,
+              0.04671114,
+              -0.0023683042,
+              0.017460158,
+              -0.007978136,
+              0.00081788,
+              -0.009908127,
+              0.0049076737,
+              -0.03604046,
+              0.024152907,
+              0.0022956813,
+              0.061990347,
+              -0.061900347,
+              0.0047628507,
+              0.007954329,
+              -0.05227117,
+              0.013897867,
+              -0.0034024485,
+              -0.06788996,
+              0.036198605,
+              -0.014600589,
+              -0.038748026,
+              0.031534728,
+              -0.037783317,
+              -0.057816587,
+              -0.054505207,
+              0.010229355,
+              -0.01668772,
+              -0.013999046,
+              -0.049303915,
+              -0.013006012,
+              -0.020143948,
+              0.0009209327,
+              0.010504151,
+              0.052313875,
+              -0.003835063,
+              0.03984861,
+              -0.05403,
+              0.004036369,
+              0.035671517,
+              -0.009310839,
+              0.01921996,
+              0.015426655,
+              -0.042717084,
+              -0.016548151,
+              -0.03559785,
+              -0.03052737,
+              0.0016032697,
+              0.04009937,
+              0.05516244,
+              -0.009645057,
+              -0.019377265,
+              0.017122837,
+              0.007185355,
+              0.012066883,
+              0.015954316,
+              -0.0029309995,
+              -0.008670052,
+              0.0007600626,
+              -0.0019616315,
+              0.03605449,
+              -0.028704248,
+              -0.057372347,
+              -0.03711266,
+              0.02601168,
+              0.020637576,
+              -0.014288832,
+              0.023694387,
+              -0.018556923,
+              -0.003977263,
+              -0.03251488,
+              -0.04545843,
+              -0.027434839,
+              0.013158248,
+              -0.005281848,
+              -0.03187363,
+              -0.022890532,
+              -0.0063330783,
+              0.040277284,
+              0.017638152,
+              -0.038472284,
+              0.015346814,
+              0.06673371,
+              -0.011651253,
+              -0.06683331,
+              0.008377879,
+              -0.030951817,
+              -0.036013808,
+              0.02394849,
+              0.023321355,
+              0.024521058,
+              -0.03078664,
+              0.014595395,
+              -0.037766363,
+              0.075227626,
+              -0.01933975,
+              0.043791853,
+              -0.025162384,
+              -0.044860955,
+              0.0059519857,
+              0.04085485,
+              0.06551164,
+              -0.05282273,
+              0.0030225238,
+              -0.06850771,
+              -0.062015526,
+              -0.06011264,
+              0.014174797,
+              -0.050894123,
+              0.017077608,
+              0.021088008,
+              0.058029104,
+              0.043224387,
+              -0.004394573,
+              -0.0022478225,
+              -0.006972821,
+              0.02401093,
+              0.022611097,
+              8.550083e-05,
+              0.056450296,
+              0.055112243,
+              -0.034522895,
+              0.06482398,
+              0.08114595,
+              0.022528961,
+              -0.013464262,
+              -0.0029874062,
+              0.005515398,
+              0.026176685,
+              -0.041392956,
+              -0.035894908,
+              -0.052102275,
+              0.032556653,
+              -0.016931413,
+              -0.047386043,
+              0.012574915,
+              0.03802867,
+              0.045309085,
+              0.025728,
+              -0.02505067,
+              0.039530423,
+              -0.065004446,
+              0.017083768,
+              0.0033854055,
+              0.07688453,
+              -0.019878633,
+              -0.0025184979,
+              -0.0027949202,
+              0.052868426,
+              0.054179598,
+              -0.0040608337,
+              -0.0053128796,
+              -0.04103081,
+              -0.049691968,
+              0.06014039,
+              0.04751648,
+              0.015087763,
+              -0.06859484,
+              0.00805693,
+              -0.061754886,
+              0.008819008,
+              -0.027785089,
+              -0.010586925,
+              -0.020496469,
+              -0.029158294,
+              -0.05417346,
+              -0.029509347,
+              -0.025456924,
+              0.041056376,
+              0.0075264946,
+              -0.018885529,
+              0.07735419,
+              0.00489195,
+              0.050696895,
+              -0.0041886116,
+              0.064080104,
+              -0.020775754,
+              -0.017177466,
+              0.0023288913,
+              0.010398866,
+              -0.040627487,
+              -0.034321204,
+              0.016019996,
+              0.028118521,
+              0.014172112,
+              0.08738979,
+              -0.03657629,
+              0.018347794,
+              -0.03947416,
+              -0.01077611,
+              0.00085160177,
+              0.0368259,
+              0.05611389,
+              0.05134766,
+              -0.025541335,
+              -0.0057555106,
+              -0.013793745,
+              -0.05975066,
+              -0.0064870752,
+              -0.053716175,
+              0.024583345,
+              -0.084030546,
+              -0.048775505,
+              -0.059886374,
+              -0.057641674,
+              0.030222055,
+              0.018706435,
+              0.023170326,
+              -0.0064046904,
+              -0.018711446,
+              -0.0029956547,
+              0.0377868,
+              -0.012569718,
+              0.0514249,
+              -0.012415474,
+              -0.018657023,
+              -0.040379863,
+              0.029388199,
+              -0.07378978,
+              0.026212148,
+              0.0056296797,
+              -0.00405927,
+              0.021354636,
+              -0.0822599,
+              0.01597725,
+              0.07648158,
+              -0.006006045,
+              -0.014829594,
+              -0.021541826,
+              0.0032610476,
+              0.06906917,
+              -0.05802312,
+              -0.023113884,
+              -0.015534724,
+              0.016758824,
+              0.0030736707,
+              -0.0022294512,
+              -0.026804008,
+              -0.0031566115,
+              -0.0584943
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/aa45f61f2d277765422722394dbeb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json
+++ b/tests/integration/common/recordings/aa45f61f2d277765422722394dbeb0f2d1dbd7e7f55f4783caf3e7f768b007e9.json
@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.029406669,
+              0.08920982,
+              -0.11326726,
+              0.0065823817,
+              0.07725067,
+              -0.036890104,
+              0.030436223,
+              0.041454185,
+              -0.049156666,
+              0.018258564,
+              0.14662577,
+              0.01744915,
+              -0.012837422,
+              -0.06889876,
+              -0.039401636,
+              -0.038800705,
+              -0.08963421,
+              -0.059656583,
+              0.001375945,
+              0.045138627,
+              0.042796962,
+              0.053700265,
+              -0.035706885,
+              0.010138017,
+              0.060920056,
+              0.017344126,
+              -0.05633907,
+              0.063370295,
+              0.0021257724,
+              -0.083796844,
+              0.050487563,
+              0.047987595,
+              0.069071226,
+              0.049588464,
+              0.117036626,
+              0.05339311,
+              0.10129953,
+              -0.048230153,
+              -0.014987975,
+              0.0250915,
+              0.031392053,
+              -0.008863942,
+              0.0073650074,
+              -0.0009767569,
+              -0.016403567,
+              0.015523393,
+              -0.010998956,
+              -0.014870063,
+              0.0061682137,
+              -0.0017961137,
+              -0.022682818,
+              0.018210242,
+              -0.07757007,
+              -0.0015845516,
+              0.069547005,
+              0.000419109,
+              0.038414054,
+              0.005823485,
+              -0.028931383,
+              0.07009549,
+              -0.0018009909,
+              0.033516172,
+              -0.014593847,
+              0.03922457,
+              0.08240545,
+              -0.050596908,
+              -0.039732855,
+              -0.024425076,
+              -0.015055329,
+              -0.11705068,
+              -0.15979129,
+              -0.008256823,
+              -0.0100719705,
+              0.03266482,
+              0.0029998205,
+              0.0316428,
+              -0.094554916,
+              0.017661797,
+              0.058996264,
+              -0.119718134,
+              -0.027414676,
+              -0.09155906,
+              0.040038,
+              0.01091849,
+              -0.029446004,
+              0.10225186,
+              0.06583262,
+              -0.003439552,
+              -0.009694834,
+              0.016906522,
+              0.023685955,
+              -0.032616187,
+              -0.010238839,
+              0.07891618,
+              -0.007330681,
+              0.05238444,
+              0.00943625,
+              0.042121,
+              0.08491511,
+              0.049208272,
+              -0.01868227,
+              -0.013585418,
+              0.06727199,
+              0.084571496,
+              -0.103213035,
+              -0.08387524,
+              0.03641842,
+              -0.047227863,
+              0.057315867,
+              -0.04463932,
+              0.006783099,
+              -0.08934107,
+              -0.015040418,
+              -0.08107057,
+              0.013285569,
+              -0.060907867,
+              -0.042128306,
+              0.057306163,
+              -0.058711898,
+              0.04628304,
+              0.070194095,
+              -0.041729517,
+              -0.0338408,
+              -0.012369257,
+              -0.044708908,
+              -0.059450094,
+              0.08251312,
+              -3.443368e-33,
+              0.0121309515,
+              -0.11084454,
+              -0.020510655,
+              0.10916455,
+              0.033683147,
+              -0.02845083,
+              0.024345158,
+              0.034192592,
+              -0.08367815,
+              0.0064610844,
+              -0.00912456,
+              -0.0663567,
+              -0.0028754657,
+              0.008272698,
+              -0.09166764,
+              0.0089771375,
+              -0.03963948,
+              0.019947624,
+              -0.01321528,
+              -0.019034218,
+              0.051933073,
+              0.028107261,
+              -0.039153125,
+              -0.080395184,
+              -0.050503474,
+              0.02060341,
+              -0.012718284,
+              -0.046732575,
+              0.017907938,
+              -0.0028334607,
+              -0.011695137,
+              -0.05667005,
+              -0.043894444,
+              0.034919597,
+              0.022352098,
+              0.046777196,
+              0.045085873,
+              -0.008840106,
+              -0.06373453,
+              0.036720857,
+              0.012829601,
+              -0.035169926,
+              0.046209145,
+              -0.014361767,
+              0.03706697,
+              -0.056797564,
+              -0.06310496,
+              0.010818958,
+              0.047810175,
+              0.0029118094,
+              -0.003235893,
+              0.061511047,
+              0.072056666,
+              -0.03286638,
+              0.005070082,
+              0.021947902,
+              -0.017779002,
+              -0.022738373,
+              -0.021926457,
+              0.047074158,
+              0.010847615,
+              0.05539702,
+              -0.07119971,
+              0.033833236,
+              0.012342855,
+              -0.047586687,
+              -0.026776271,
+              -0.09885727,
+              0.10053448,
+              0.036877092,
+              -0.07049897,
+              -0.059692938,
+              0.016129492,
+              -0.0016443401,
+              -0.026804024,
+              -0.013527272,
+              -0.015385511,
+              0.055627547,
+              -0.060485132,
+              -0.055540122,
+              -0.04329072,
+              -0.07097361,
+              -0.04857043,
+              -0.03726256,
+              -0.09059366,
+              -0.036855534,
+              0.024561211,
+              -0.10113953,
+              0.056738112,
+              -0.10995085,
+              0.042282794,
+              0.014222368,
+              -0.07067843,
+              -0.05902307,
+              0.06426122,
+              1.6036318e-33,
+              0.037851896,
+              0.032911286,
+              -0.04029648,
+              -0.00049357174,
+              0.028011942,
+              0.048672136,
+              0.07279598,
+              -0.027471887,
+              -0.02847654,
+              0.114492,
+              0.001777095,
+              -0.009519909,
+              0.0025862327,
+              -0.056408145,
+              0.023462169,
+              -0.006209674,
+              -0.010567065,
+              -0.05877587,
+              -0.032393616,
+              0.011836781,
+              -0.038905054,
+              0.05516299,
+              0.09564333,
+              0.028543225,
+              -0.023832332,
+              -0.0015711841,
+              0.047049087,
+              0.03128219,
+              0.02811091,
+              0.007177092,
+              0.055283513,
+              0.06574452,
+              -0.1020208,
+              0.021213628,
+              0.020237882,
+              -0.10449357,
+              0.09608935,
+              -0.06253181,
+              0.015293753,
+              0.042053986,
+              0.06105009,
+              0.0909162,
+              0.018404186,
+              0.031023262,
+              0.03562763,
+              0.112073965,
+              0.10124763,
+              -0.007683015,
+              0.013140281,
+              -0.042280227,
+              0.051135287,
+              -0.02950743,
+              0.027794402,
+              -0.010734668,
+              -0.011067552,
+              0.058104575,
+              -0.009284788,
+              0.056184508,
+              -0.040822964,
+              0.010282754,
+              0.0374409,
+              0.054198533,
+              -0.061418086,
+              0.030569963,
+              0.0023648597,
+              -0.054184474,
+              -0.020570045,
+              0.012422129,
+              0.025696559,
+              -0.007607385,
+              -0.026194826,
+              -0.024159024,
+              0.0012979766,
+              -0.07461716,
+              0.051458035,
+              -0.004183808,
+              -0.040804464,
+              -0.023975441,
+              0.009455526,
+              -0.0018798193,
+              0.03668693,
+              -0.019319497,
+              -0.06195781,
+              0.06456675,
+              0.040328216,
+              -0.010790134,
+              0.013190221,
+              0.09067539,
+              -0.0051480443,
+              0.013312647,
+              -0.029548675,
+              0.07769003,
+              0.0027328292,
+              0.04533781,
+              -0.0017606319,
+              -1.661594e-08,
+              -0.040610366,
+              -0.09883059,
+              -0.05522113,
+              -0.02916469,
+              -0.019305382,
+              0.088138185,
+              -0.038325552,
+              -0.03327639,
+              -0.012629364,
+              0.006948921,
+              0.010438818,
+              0.026771523,
+              -0.040855426,
+              -0.03958403,
+              -0.051137064,
+              -0.016159322,
+              -0.020525131,
+              -0.023726366,
+              -0.013322245,
+              -0.008097836,
+              0.028000915,
+              0.02806969,
+              0.015645925,
+              -0.0043166955,
+              0.0054488196,
+              0.06720413,
+              0.068473674,
+              0.07172716,
+              -0.06339439,
+              -0.02540609,
+              0.08468492,
+              0.041936778,
+              0.021067144,
+              -0.07596481,
+              0.017143335,
+              0.1260291,
+              0.121315174,
+              0.08431059,
+              0.040587336,
+              0.036687315,
+              -0.04717,
+              -0.022659328,
+              -0.006820436,
+              0.005210712,
+              -0.033785924,
+              -0.08449115,
+              -0.0844501,
+              -0.03192747,
+              -0.036649443,
+              -0.13791409,
+              -0.036417518,
+              -0.00080547476,
+              -0.047578912,
+              0.038795993,
+              -0.06757743,
+              0.016941966,
+              0.036312684,
+              0.0125779435,
+              -0.058240637,
+              0.004471269,
+              0.03226526,
+              0.09821741,
+              0.053010236,
+              -0.016268
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/d48ba62fab4e243d368ec42e5497b932ab697ffaa1cc79a7caf46b404677fb31.json
+++ b/tests/integration/common/recordings/d48ba62fab4e243d368ec42e5497b932ab697ffaa1cc79a7caf46b404677fb31.json
@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.009745733,
+              0.03363038,
+              -0.10852256,
+              0.026609829,
+              -0.0060599064,
+              -0.020473678,
+              0.0692486,
+              0.032276765,
+              -0.11532835,
+              -0.0005207133,
+              0.11814916,
+              0.0119809555,
+              0.03685765,
+              -0.10744223,
+              -0.046515625,
+              0.0015449532,
+              -0.06319664,
+              -0.04640812,
+              -0.037318822,
+              -0.025718328,
+              -0.00026058854,
+              -0.011890766,
+              -0.050925612,
+              0.014111713,
+              0.029467698,
+              0.006379121,
+              -0.012013293,
+              -0.0024293982,
+              -0.044318773,
+              -0.08100101,
+              0.02009568,
+              0.055713937,
+              0.078816675,
+              0.054973654,
+              0.20367871,
+              -0.004309458,
+              0.03877001,
+              0.03825522,
+              -0.002538199,
+              -0.0007973801,
+              0.044761047,
+              -0.054529082,
+              -0.008856888,
+              -0.04078078,
+              0.011367262,
+              -0.022404457,
+              -0.06209053,
+              0.02558725,
+              -0.0034454092,
+              -0.03743928,
+              -0.062026348,
+              -0.030812219,
+              -0.034592565,
+              -0.014926672,
+              0.018588377,
+              0.013435887,
+              0.08169151,
+              0.053658403,
+              -0.03557856,
+              0.033325985,
+              -0.01637577,
+              -0.0222152,
+              -0.039247517,
+              0.00094368146,
+              0.10228945,
+              -0.04305617,
+              -0.052200828,
+              -0.02007385,
+              0.054805383,
+              -0.08231377,
+              -0.14736547,
+              0.048954617,
+              -0.0212168,
+              0.02872658,
+              -0.0671409,
+              0.021436114,
+              -0.023599947,
+              0.03677982,
+              0.010577411,
+              -0.0966004,
+              -0.06367233,
+              -0.10277648,
+              0.0273993,
+              -0.06292906,
+              -0.046344172,
+              0.039919835,
+              0.02682899,
+              0.025460077,
+              -0.013083559,
+              -0.002667712,
+              -0.016529463,
+              0.012605053,
+              -0.0064383023,
+              0.015841383,
+              -0.01710707,
+              0.12320292,
+              -0.0077660284,
+              0.05845043,
+              0.07362552,
+              0.038426086,
+              0.004742023,
+              -0.0155985365,
+              0.01418979,
+              0.07865995,
+              -0.026352523,
+              -0.037174653,
+              0.06787817,
+              -0.060126718,
+              0.06111402,
+              -0.034931272,
+              -0.009446326,
+              -0.006150886,
+              0.02892313,
+              -0.09361577,
+              0.0335364,
+              -0.09088912,
+              0.009241144,
+              0.07092964,
+              -0.08954648,
+              0.04494549,
+              0.040462427,
+              -0.04167353,
+              0.0076030386,
+              -0.0066417656,
+              -0.07275736,
+              -0.043690544,
+              0.07685007,
+              -1.0508795e-33,
+              -0.019583685,
+              -0.13087204,
+              -0.03574564,
+              0.070223756,
+              0.08133056,
+              -0.009436003,
+              0.046778366,
+              0.03478148,
+              -0.09441185,
+              -0.040857755,
+              -0.02127058,
+              -0.106959894,
+              0.024023255,
+              0.022780996,
+              -0.09042505,
+              -0.035755932,
+              0.011359196,
+              0.050059184,
+              0.0050815986,
+              -0.07676938,
+              0.05453651,
+              0.04191775,
+              -0.009206564,
+              -0.022437057,
+              -0.04617258,
+              -0.038608693,
+              -0.00036489012,
+              -0.025092375,
+              0.039146807,
+              -0.0072839926,
+              0.03675482,
+              -0.011301064,
+              -0.08863303,
+              0.059421506,
+              0.015851071,
+              0.033407707,
+              0.056883834,
+              -0.01203776,
+              0.027333334,
+              -0.009560535,
+              -0.05030555,
+              -0.009787559,
+              0.023205005,
+              -0.007937716,
+              0.003991047,
+              -0.036422852,
+              -0.06979188,
+              0.046075627,
+              0.056377746,
+              0.0071927872,
+              -0.00020658698,
+              0.017678235,
+              0.023745935,
+              -0.0031295705,
+              0.016370842,
+              0.027585855,
+              -0.03440131,
+              -0.05594279,
+              0.036442764,
+              0.03577988,
+              -0.005324585,
+              0.015240975,
+              -0.09071462,
+              0.072764605,
+              0.02343818,
+              -0.093097225,
+              0.05842133,
+              -0.061913762,
+              0.045556016,
+              0.07639311,
+              -0.035199754,
+              -0.009256856,
+              0.038682748,
+              -0.040795818,
+              0.017686425,
+              -0.025513103,
+              0.06860537,
+              0.085520275,
+              -0.1023457,
+              -0.0036474275,
+              -0.014826131,
+              -0.05045756,
+              -0.09065474,
+              -0.076476775,
+              -0.008538021,
+              -0.04111943,
+              -0.035473913,
+              -0.061549038,
+              0.114327826,
+              -0.09601482,
+              0.022990143,
+              0.0022396755,
+              -0.023026146,
+              -0.028128328,
+              0.07969127,
+              -4.1765383e-34,
+              0.07866384,
+              0.11484068,
+              0.016687382,
+              0.009315677,
+              0.01664128,
+              0.024303248,
+              0.046507504,
+              -0.043804675,
+              -0.09136995,
+              0.106353745,
+              -0.06948852,
+              0.018747667,
+              0.0053492193,
+              -0.033229355,
+              0.042339083,
+              -0.0017468681,
+              0.05323157,
+              0.0058223205,
+              -0.05331342,
+              0.016506517,
+              -0.02325185,
+              0.097519755,
+              -0.0045558517,
+              0.08866843,
+              -0.028221445,
+              -0.012007969,
+              -0.009742725,
+              0.061458003,
+              0.01574456,
+              -0.00039456616,
+              0.02444834,
+              0.065891184,
+              -0.054779086,
+              0.04863689,
+              0.043890025,
+              -0.062467597,
+              0.07615393,
+              0.0067509366,
+              0.019150084,
+              0.06994535,
+              0.027900916,
+              0.08902746,
+              -0.027433047,
+              0.031390887,
+              0.02271287,
+              0.08119532,
+              0.06855678,
+              0.0023552915,
+              -0.06764184,
+              0.00704173,
+              -0.034521427,
+              -0.053785548,
+              -0.03075216,
+              0.007947864,
+              -0.025317406,
+              -0.040664013,
+              0.036144093,
+              0.017730465,
+              -0.040179063,
+              0.013665757,
+              0.004815376,
+              0.009095556,
+              0.0072483593,
+              0.012753351,
+              -0.047865536,
+              -0.046072423,
+              -0.014048283,
+              0.031082962,
+              -0.034945205,
+              -0.023550391,
+              0.033062257,
+              -0.022966444,
+              0.007744228,
+              0.015939556,
+              -0.0012224894,
+              0.0010534802,
+              -0.015109,
+              -0.021597888,
+              -0.029862719,
+              0.03983828,
+              0.062536344,
+              0.0106168175,
+              -0.027220478,
+              0.02410377,
+              -0.0023566757,
+              0.085310005,
+              0.04843323,
+              0.090823516,
+              0.005126319,
+              0.020297319,
+              -0.01739127,
+              0.047677357,
+              0.11080086,
+              0.030030197,
+              0.029773563,
+              -1.5454503e-08,
+              -0.03580758,
+              -0.12177604,
+              0.019753791,
+              0.05854353,
+              -0.01590761,
+              0.085781366,
+              -0.09558486,
+              -0.0016744126,
+              0.00773199,
+              -0.04790156,
+              0.01175936,
+              0.006536077,
+              -0.032027386,
+              0.0031026274,
+              -0.07580574,
+              -0.039700802,
+              -0.00170645,
+              -0.070955865,
+              0.043680355,
+              0.029966798,
+              0.0039943648,
+              0.031923376,
+              0.08119928,
+              0.038820695,
+              0.013302812,
+              0.041675337,
+              0.044349737,
+              0.060403902,
+              -0.1058191,
+              -0.05287386,
+              0.050275758,
+              0.039101604,
+              0.0599918,
+              -0.025067834,
+              -0.019554066,
+              0.06748813,
+              0.12508559,
+              0.059007537,
+              -0.019899847,
+              -0.030194808,
+              -0.046559453,
+              0.034567222,
+              -0.021644907,
+              -0.03327634,
+              -0.0075667608,
+              -0.100658834,
+              -0.0639619,
+              -0.055270903,
+              -0.0111757815,
+              -0.11671873,
+              -0.07208087,
+              0.023208033,
+              0.027215267,
+              0.063635156,
+              -0.05858023,
+              0.020345282,
+              0.018325811,
+              -0.0036095325,
+              0.006916675,
+              0.06541716,
+              0.009575581,
+              0.046839867,
+              0.0070611075,
+              -0.09470841
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/e297006956fc1fb184d0bbaa79f7beaa67a3824a6cd5d7a0e21c8e587ea03980.json
+++ b/tests/integration/common/recordings/e297006956fc1fb184d0bbaa79f7beaa67a3824a6cd5d7a0e21c8e587ea03980.json
@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is the content of test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.011639302,
+              0.015066345,
+              -0.1572681,
+              -0.044965014,
+              0.045302268,
+              0.012036585,
+              0.036542512,
+              0.005725059,
+              -0.052068613,
+              -0.023581833,
+              -0.0041714185,
+              0.047297083,
+              0.0044818125,
+              0.0073796143,
+              -0.06833552,
+              -0.020871542,
+              0.030256433,
+              -0.052156504,
+              -0.016426736,
+              -0.045092978,
+              -0.012395779,
+              -0.017792987,
+              -0.08013035,
+              -0.025271736,
+              0.110538565,
+              0.019197728,
+              -0.05617586,
+              0.045342237,
+              -0.100888394,
+              -0.015532925,
+              0.04541118,
+              -0.048470836,
+              0.014533936,
+              -0.04054472,
+              0.004343861,
+              -0.03328387,
+              0.038874496,
+              0.034725353,
+              0.022646122,
+              0.024648184,
+              -0.02911675,
+              -0.0140342,
+              -0.010215055,
+              -0.00092316914,
+              0.015458233,
+              0.0042022346,
+              -0.0118241655,
+              0.031950384,
+              0.021818206,
+              -0.0009401939,
+              0.0028767833,
+              0.022676043,
+              -0.027578428,
+              0.017072845,
+              0.055942602,
+              0.008372957,
+              -8.234923e-05,
+              -0.0076934453,
+              0.017103186,
+              -0.07049976,
+              0.0470288,
+              0.030520689,
+              -0.0853248,
+              0.031003723,
+              0.005461848,
+              -0.039933346,
+              -0.028195756,
+              0.02583814,
+              -0.020038705,
+              0.027421296,
+              0.09211795,
+              -0.0021492639,
+              0.009114191,
+              -0.02858135,
+              -0.0130490055,
+              -0.047928475,
+              0.021908045,
+              -0.03816779,
+              -0.040175024,
+              -0.008988226,
+              0.062123742,
+              0.032331105,
+              0.013500545,
+              0.014699184,
+              0.06949165,
+              -0.029347114,
+              -0.036963575,
+              -0.025804758,
+              0.006973289,
+              0.03219541,
+              0.014725156,
+              0.032485504,
+              0.025228832,
+              0.015978005,
+              -0.0036463195,
+              0.009395636,
+              -0.0030804265,
+              0.026493264,
+              -0.01026861,
+              0.0003747859,
+              0.017421532,
+              0.015864568,
+              0.0254427,
+              -0.021840125,
+              0.006622214,
+              0.018735437,
+              -0.008662971,
+              8.567802e-05,
+              -0.08026379,
+              -0.003987451,
+              -0.0022324976,
+              0.030920457,
+              -0.03272228,
+              -0.025135856,
+              0.015818166,
+              -0.030539474,
+              0.084593095,
+              -0.0072662015,
+              -0.04306349,
+              0.016708883,
+              -0.02148629,
+              -0.021512741,
+              0.011571002,
+              0.07055689,
+              0.016873637,
+              0.017103009,
+              -0.058425475,
+              0.009254332,
+              0.018121762,
+              -0.029209172,
+              -0.017481297,
+              0.005129311,
+              -1.4240719e-05,
+              -0.02815651,
+              -0.012156167,
+              -0.011126637,
+              0.012530035,
+              0.017916953,
+              -0.049299978,
+              -0.023406321,
+              -0.025908781,
+              0.01868743,
+              0.013128073,
+              0.030577261,
+              0.03492911,
+              -0.026720364,
+              0.044888426,
+              0.015100583,
+              -0.02517811,
+              -0.018026695,
+              0.04455666,
+              0.006026339,
+              0.006132853,
+              0.013067113,
+              0.013630368,
+              -0.06992026,
+              0.015714098,
+              -0.025995858,
+              0.00764349,
+              0.06502328,
+              0.00921131,
+              0.00039559926,
+              -0.014907944,
+              0.033250615,
+              -0.022297438,
+              -0.022631606,
+              -0.009259513,
+              0.07459313,
+              0.07961807,
+              -0.04546293,
+              -0.04984229,
+              -0.056986727,
+              -0.021624641,
+              -0.009604434,
+              -0.050308105,
+              -0.029882062,
+              0.02890167,
+              -0.016925206,
+              0.03357617,
+              -0.013084858,
+              0.032628123,
+              0.004407517,
+              0.028667213,
+              0.02581998,
+              -0.008354794,
+              -0.045190092,
+              0.017996402,
+              -0.021489577,
+              -0.049504388,
+              0.003702337,
+              -0.023653682,
+              -0.007418799,
+              -0.09230719,
+              -0.0666417,
+              0.01368294,
+              -0.07626095,
+              0.016283033,
+              -0.025274273,
+              0.046640623,
+              -0.03306251,
+              -0.019547738,
+              -0.02728644,
+              -0.038707435,
+              -0.0075380616,
+              -0.03706684,
+              -0.014038333,
+              -0.019394161,
+              -0.010599687,
+              -0.0057279305,
+              0.01753179,
+              0.037147418,
+              -0.01409748,
+              -0.028078519,
+              0.028943742,
+              0.044126343,
+              -0.024797611,
+              -0.02061766,
+              -0.041145287,
+              -0.0016994113,
+              -0.035794605,
+              0.022767134,
+              0.007715133,
+              0.033083446,
+              -0.06898011,
+              0.0077070463,
+              0.0039297407,
+              -0.038172692,
+              -0.032068398,
+              -0.043679804,
+              -0.0292851,
+              -0.020715753,
+              -0.05462352,
+              0.011206036,
+              0.020920858,
+              -0.007133438,
+              -0.006820509,
+              -0.016809242,
+              0.06488191,
+              -0.0150427865,
+              0.040075593,
+              -0.047243405,
+              0.05071197,
+              0.015879754,
+              -0.0006090825,
+              0.0067252424,
+              0.0052318904,
+              0.0038148144,
+              -0.032034587,
+              0.032176103,
+              0.040441014,
+              -0.03223476,
+              0.0034279015,
+              0.04811163,
+              0.058563426,
+              0.025335358,
+              -0.03077014,
+              -0.0060142917,
+              -0.025248509,
+              0.024592392,
+              -0.03674746,
+              0.024663158,
+              -0.060253005,
+              0.009173809,
+              -0.004111937,
+              -0.063402615,
+              -0.01951628,
+              -0.039490156,
+              0.018258424,
+              -0.043015976,
+              -0.015164487,
+              -0.017073318,
+              0.027809769,
+              -0.021215433,
+              0.007797112,
+              0.008731678,
+              -0.036673818,
+              0.012786695,
+              0.028968208,
+              -0.030241087,
+              0.020865943,
+              -0.026749771,
+              0.033981804,
+              0.010454427,
+              0.023153242,
+              0.020885227,
+              -0.0056243115,
+              0.0117305005,
+              -0.015051302,
+              0.013582618,
+              0.03807434,
+              0.010856497,
+              0.020801183,
+              0.011158894,
+              0.036391996,
+              0.019670399,
+              0.007724792,
+              0.06660602,
+              -0.011434749,
+              0.0057949307,
+              -0.015963648,
+              -0.019779123,
+              0.005820883,
+              0.02833991,
+              0.055220414,
+              0.010273399,
+              -0.016092837,
+              0.03503124,
+              -0.034432467,
+              0.023686841,
+              0.022379564,
+              -0.07128316,
+              0.012263694,
+              -0.015228141,
+              0.0032988787,
+              -0.029930541,
+              0.041881878,
+              0.03506383,
+              0.020228907,
+              0.0438159,
+              -0.038998622,
+              0.0033828963,
+              -0.082220346,
+              -0.021915225,
+              -0.00014996591,
+              0.02804432,
+              0.020062406,
+              0.012756022,
+              0.034497134,
+              -0.02747778,
+              -0.047376838,
+              0.064383976,
+              0.070425786,
+              -0.05746651,
+              -0.028404344,
+              0.026372714,
+              0.03306257,
+              0.0073155067,
+              0.051485326,
+              0.0068675145,
+              0.040136788,
+              0.045383066,
+              0.034149066,
+              0.02086147,
+              0.0009087964,
+              0.037278313,
+              -0.081617154,
+              -0.032882202,
+              0.02157909,
+              0.021868218,
+              0.07965252,
+              -0.0027324036,
+              -0.0022803254,
+              0.014258049,
+              -0.0020600832,
+              0.00047349077,
+              0.04002713,
+              0.04263055,
+              -0.009511693,
+              0.06796055,
+              -0.02155429,
+              0.043834608,
+              -0.029989557,
+              0.009623121,
+              -0.026068889,
+              0.021337777,
+              0.011070724,
+              -0.020380916,
+              -0.0023191955,
+              0.046481982,
+              0.039304417,
+              -0.0045394786,
+              0.003737432,
+              0.034863517,
+              0.053514365,
+              0.035962798,
+              0.04095995,
+              -0.080873586,
+              0.0112584885,
+              -0.0145209655,
+              0.023800805,
+              0.04855744,
+              0.0037306463,
+              0.03949077,
+              0.042007603,
+              0.00916003,
+              -0.012223143,
+              0.022103913,
+              -0.017077385,
+              0.035043065,
+              0.0052557834,
+              -0.039841656,
+              0.0020140728,
+              -0.057917137,
+              0.03641347,
+              0.017727314,
+              -0.030229636,
+              0.026509946,
+              0.010324972,
+              -0.028184937,
+              0.017539727,
+              -0.021746434,
+              0.0031611102,
+              -0.008564719,
+              0.026577024,
+              -0.0073260553,
+              0.012139988,
+              -0.039608642,
+              -0.062452354,
+              0.03773313,
+              0.002820345,
+              0.017331626,
+              -0.008981819,
+              -0.02020533,
+              -0.057272766,
+              -0.014693149,
+              0.033687364,
+              0.038407385,
+              -0.020838683,
+              0.038617346,
+              -0.03282725,
+              0.0065172473,
+              0.031010486,
+              -0.0017651296,
+              -0.02163586,
+              -0.008899588,
+              -0.026506478,
+              0.03540833,
+              0.07076032,
+              -0.016357146,
+              -0.08069671,
+              -0.042310607,
+              -0.012363274,
+              0.03790111,
+              0.007565661,
+              -0.037524316,
+              0.07095513,
+              0.010869782,
+              0.0032129285,
+              -0.033399966,
+              0.038155936,
+              0.034415327,
+              -0.052643284,
+              -0.05567196,
+              -0.03225739,
+              0.008719539,
+              0.14483878,
+              0.071855366,
+              -0.026637336,
+              -0.04281552,
+              -0.02133026,
+              0.020932574,
+              0.023442162,
+              0.0018492922,
+              0.03244938,
+              0.08237317,
+              -0.03321164,
+              0.051374298,
+              -0.018296566,
+              -0.009659297,
+              0.031976808,
+              -0.010097727,
+              0.010057915,
+              0.051651575,
+              0.0199425,
+              0.019540219,
+              -0.020617861,
+              0.03563907,
+              -0.036343392,
+              0.032987807,
+              0.06027452,
+              -0.017668264,
+              -0.044425867,
+              0.015104213,
+              -0.07373515,
+              0.01810383,
+              0.031706426,
+              -0.046879865,
+              0.0036537861,
+              -0.031956047,
+              0.03578955,
+              0.027828328,
+              0.021754785,
+              -0.062319316,
+              -0.035861533,
+              0.023409521,
+              -0.011718964,
+              0.012511818,
+              0.019975103,
+              0.03046746,
+              0.019306395,
+              0.008897869,
+              0.022976985,
+              0.08666871,
+              0.034413245,
+              0.007698169,
+              -0.013328631,
+              0.026807705,
+              -0.039164156,
+              0.0001842902,
+              0.008939378,
+              0.053093646,
+              0.0054843645,
+              -0.0048546907,
+              0.006646481,
+              -0.036913976,
+              -0.02434218,
+              -0.007819763,
+              -0.034326635,
+              -0.09425071,
+              -0.035864092,
+              -0.027039077,
+              0.0018631782,
+              -0.011367168,
+              0.03460308,
+              0.06908907,
+              0.0006993122,
+              0.029187243,
+              0.013981396,
+              -0.034905925,
+              0.009661519,
+              0.016402403,
+              0.013219478,
+              0.025419146,
+              -0.013838593,
+              -0.09521828,
+              0.04690183,
+              0.008306249,
+              -0.04494361,
+              0.07675296,
+              0.08630913,
+              0.0027291386,
+              0.047438163,
+              -0.03291628,
+              -0.017013406,
+              0.008466675,
+              0.0068329596,
+              -0.047961134,
+              -0.0060370415,
+              0.017779041,
+              0.05304337,
+              -0.07138653,
+              -0.013791788,
+              0.01667366,
+              -0.026808698,
+              0.012813507,
+              -0.029537767,
+              -0.07048566,
+              0.026801381,
+              -0.021863695,
+              -0.08986038,
+              0.04256004,
+              -0.042580713,
+              -0.050321113,
+              -0.02441381,
+              0.024967946,
+              -0.03307329,
+              0.023765154,
+              -0.042465124,
+              -0.022590572,
+              -0.050977908,
+              0.02002681,
+              -0.01659008,
+              -0.0016500946,
+              0.007923218,
+              0.023085529,
+              -0.028293792,
+              -0.0070867077,
+              -0.002519201,
+              0.014844528,
+              0.012927241,
+              0.013701682,
+              -0.048480112,
+              0.017051037,
+              -0.048594326,
+              -0.03374255,
+              0.015788445,
+              0.01736624,
+              0.02363127,
+              -0.043622795,
+              -0.04752542,
+              0.05619384,
+              -0.009064419,
+              0.013587886,
+              0.031963795,
+              0.0055674682,
+              0.00821165,
+              -0.007879534,
+              -0.025519967,
+              0.030929072,
+              -0.03054716,
+              -0.028717758,
+              -0.01304714,
+              0.025171572,
+              -0.004879199,
+              -0.001190343,
+              -0.010213315,
+              0.01971419,
+              -0.032143768,
+              -0.008055433,
+              -0.045028396,
+              0.0050284,
+              0.008977255,
+              0.007132238,
+              -0.052949388,
+              0.011562612,
+              -0.0043699676,
+              0.06377099,
+              -0.010715953,
+              -0.027962748,
+              0.0025381946,
+              0.065418504,
+              0.015951851,
+              -0.10228855,
+              -0.0038436814,
+              -0.015558708,
+              -0.035604823,
+              0.039515387,
+              -0.011977611,
+              0.008272532,
+              -0.047362626,
+              0.029810345,
+              -0.026100902,
+              0.080183394,
+              -0.029716058,
+              -0.008065036,
+              -0.0019149253,
+              -0.029152166,
+              0.030865246,
+              0.028290713,
+              0.059991617,
+              -0.0539013,
+              0.037941493,
+              -0.046701435,
+              -0.056897625,
+              -0.050652288,
+              0.0022519496,
+              -0.044697277,
+              0.018704673,
+              0.024128519,
+              0.06013336,
+              0.057803143,
+              -0.011098817,
+              0.004350433,
+              -0.046533823,
+              0.011547173,
+              0.039410993,
+              0.010503389,
+              0.058373533,
+              0.04097013,
+              -0.04243095,
+              0.09540366,
+              0.07546867,
+              0.057422172,
+              -0.0150666535,
+              -0.00072658417,
+              -0.0055776117,
+              0.03369649,
+              -0.07023698,
+              -0.041115183,
+              -0.06924242,
+              0.0061645363,
+              -0.00047588223,
+              -0.03563763,
+              0.011595489,
+              -0.0034681638,
+              0.02738642,
+              0.026109103,
+              -0.018220779,
+              0.026244855,
+              -0.067560904,
+              0.026338186,
+              0.016787479,
+              0.065061815,
+              -0.0032663948,
+              -0.040305886,
+              0.017459001,
+              0.036517326,
+              0.055479337,
+              0.00085552345,
+              -0.0372879,
+              -0.06509678,
+              -0.038734257,
+              0.052903496,
+              0.033298932,
+              0.039541215,
+              -0.09552075,
+              -0.0096350545,
+              -0.08214571,
+              -0.024635889,
+              0.012038027,
+              0.00089192577,
+              -0.03183621,
+              -0.011991485,
+              -0.03902091,
+              -0.0127780195,
+              -0.01724641,
+              0.051544886,
+              -0.0018517342,
+              -0.023545155,
+              0.046582974,
+              0.00838307,
+              0.030676562,
+              0.00019708494,
+              0.045098882,
+              -0.031479437,
+              -0.013706887,
+              0.021959703,
+              0.0020392945,
+              -0.06168245,
+              -0.03649696,
+              0.035295885,
+              0.02590806,
+              -0.010051864,
+              0.06865142,
+              -0.017345844,
+              0.01564999,
+              -0.00623685,
+              0.010844825,
+              -0.013015856,
+              0.022496467,
+              0.07649363,
+              0.036356304,
+              -0.040345356,
+              0.00293154,
+              -0.01804687,
+              -0.03515604,
+              0.022299029,
+              -0.03676945,
+              0.07276787,
+              -0.04430659,
+              -0.03392204,
+              -0.030020125,
+              -0.022968723,
+              0.029162299,
+              -0.0033855392,
+              0.021752143,
+              0.017534897,
+              -0.023780832,
+              0.027371254,
+              0.017058812,
+              -0.0004049258,
+              0.03990323,
+              -0.008081489,
+              -0.013143231,
+              -0.06439464,
+              0.018572995,
+              -0.046607014,
+              0.027462576,
+              0.014255841,
+              -0.02674485,
+              0.023134982,
+              -0.070987545,
+              0.00939401,
+              0.023703443,
+              -0.009809178,
+              0.022829901,
+              -0.040908735,
+              0.0064307996,
+              0.11391804,
+              -0.051118158,
+              0.020216303,
+              -0.02172353,
+              0.04750726,
+              0.018758802,
+              -0.0051700706,
+              -0.02455834,
+              0.005184222,
+              -0.036763046
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/f4b0cf7f241feb7ff68414545a42d5759b33d997f7b1305fc20ae7f7c50faa26.json
+++ b/tests/integration/common/recordings/f4b0cf7f241feb7ff68414545a42d5759b33d997f7b1305fc20ae7f7c50faa26.json
@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 2"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.014871168,
+              0.094365,
+              -0.098275684,
+              0.016189486,
+              0.072296426,
+              -0.039229725,
+              0.007638039,
+              0.035811495,
+              -0.03784589,
+              0.022591105,
+              0.15810202,
+              0.009195058,
+              -0.029846655,
+              -0.06448414,
+              -0.01898075,
+              -0.02023675,
+              -0.07593923,
+              -0.04666322,
+              0.010769107,
+              0.033283222,
+              0.06951838,
+              0.039086174,
+              -0.009640043,
+              -0.008601025,
+              0.039979056,
+              0.02799972,
+              -0.06578151,
+              0.08029443,
+              0.0101568075,
+              -0.07898879,
+              0.048795786,
+              0.057297125,
+              0.025737243,
+              0.03572965,
+              0.11485981,
+              0.030900626,
+              0.118485495,
+              -0.041167885,
+              -0.019413618,
+              -0.0009897926,
+              0.03717747,
+              -0.012367201,
+              -0.0026639055,
+              0.015703445,
+              -0.0046827365,
+              0.023138778,
+              0.012855939,
+              -0.029367425,
+              0.00042996072,
+              -0.003222942,
+              -0.055509202,
+              0.012830617,
+              -0.06941755,
+              -0.011024706,
+              0.07149942,
+              0.021040803,
+              0.0409756,
+              0.010087916,
+              -0.015326204,
+              0.06633094,
+              0.024846299,
+              0.030543685,
+              -0.036063526,
+              0.04786587,
+              0.08074621,
+              -0.051489003,
+              -0.03944393,
+              -0.025607359,
+              -0.030061793,
+              -0.119378455,
+              -0.14597124,
+              -0.0019379344,
+              0.008393092,
+              0.023913048,
+              0.028285578,
+              0.017838098,
+              -0.10575887,
+              0.008080291,
+              0.06388723,
+              -0.12506105,
+              -0.02536782,
+              -0.11007926,
+              0.051198784,
+              0.007446184,
+              -0.030837545,
+              0.09254253,
+              0.05638562,
+              -0.0155668175,
+              -0.031867314,
+              0.018337138,
+              0.02442871,
+              -0.042078987,
+              0.0038125275,
+              0.089955,
+              -0.008119613,
+              0.040103614,
+              0.011012824,
+              0.044628628,
+              0.0791957,
+              0.054247666,
+              -0.027651828,
+              -0.03190785,
+              0.041443683,
+              0.041629724,
+              -0.077835254,
+              -0.09937542,
+              0.029904107,
+              -0.05434366,
+              0.07058962,
+              -0.04535761,
+              0.03365359,
+              -0.061656676,
+              -0.018105442,
+              -0.07228336,
+              0.035377987,
+              -0.03161877,
+              -0.020589713,
+              0.058485094,
+              -0.049225487,
+              0.03934316,
+              0.08550028,
+              -0.029991213,
+              -0.05576064,
+              -0.029334918,
+              -0.053031918,
+              -0.061839186,
+              0.08176057,
+              -3.3282106e-33,
+              0.00018265574,
+              -0.09808404,
+              -0.00554673,
+              0.13180184,
+              0.026467713,
+              -0.03976283,
+              0.010410568,
+              0.022475285,
+              -0.07190717,
+              0.005138454,
+              -0.021325583,
+              -0.1046733,
+              0.0020021838,
+              0.023773609,
+              -0.057499945,
+              -0.011727483,
+              -0.020912478,
+              0.026353713,
+              0.01779019,
+              -0.0148312645,
+              0.064687304,
+              0.045060385,
+              -0.029312065,
+              -0.08633001,
+              -0.026792597,
+              0.014552106,
+              0.004505434,
+              -0.06774755,
+              0.034052122,
+              0.013713737,
+              -0.0075813113,
+              -0.059718475,
+              -0.016189422,
+              0.044314116,
+              0.026844766,
+              0.026430624,
+              0.024091395,
+              -0.0032406747,
+              -0.075288124,
+              0.032822173,
+              0.027104331,
+              -0.026295068,
+              0.04316082,
+              -0.010091815,
+              0.034184698,
+              -0.08266358,
+              -0.020962045,
+              -0.00719584,
+              0.068549044,
+              0.005033586,
+              0.0017975906,
+              0.06465498,
+              0.05990613,
+              -0.012483792,
+              0.024451919,
+              0.021659598,
+              -0.0046074707,
+              -0.004559902,
+              0.002713282,
+              0.062373567,
+              0.0035651235,
+              0.06017224,
+              -0.062707886,
+              0.039937016,
+              -0.0064443815,
+              -0.041358124,
+              -0.045459975,
+              -0.1090475,
+              0.08058783,
+              0.055110224,
+              -0.05126053,
+              -0.05976516,
+              0.037940193,
+              0.015456569,
+              -0.024956519,
+              -0.037877902,
+              -0.006799,
+              0.031685203,
+              -0.036858797,
+              -0.055584695,
+              -0.048513155,
+              -0.07101657,
+              -0.041681714,
+              -0.04429727,
+              -0.09584418,
+              -0.060873836,
+              0.008867621,
+              -0.106438614,
+              0.040050562,
+              -0.084729105,
+              0.018111277,
+              0.010153493,
+              -0.08883196,
+              -0.063969284,
+              0.08611972,
+              1.4074298e-33,
+              0.03433739,
+              0.037653737,
+              -0.05348675,
+              0.0015385789,
+              0.026684077,
+              0.026603375,
+              0.07006387,
+              -0.034265522,
+              -0.018221779,
+              0.10960259,
+              0.013464475,
+              -0.008325532,
+              0.019438146,
+              -0.039553005,
+              0.03469477,
+              -0.0123773115,
+              -0.013288484,
+              -0.048081715,
+              -0.019539693,
+              -0.0033996427,
+              -0.024453517,
+              0.061505664,
+              0.119236834,
+              0.026294904,
+              -0.01607055,
+              -0.011499089,
+              0.04267117,
+              0.0295908,
+              0.022084564,
+              0.007893738,
+              0.052055445,
+              0.05781507,
+              -0.13408813,
+              0.01778491,
+              0.021400984,
+              -0.12113228,
+              0.10535695,
+              -0.07358604,
+              -0.013651957,
+              0.04049295,
+              0.054150987,
+              0.0987462,
+              0.0110208625,
+              0.040327504,
+              0.034936633,
+              0.10400846,
+              0.12958324,
+              -0.024531014,
+              0.002284699,
+              -0.044239815,
+              0.049778443,
+              -0.055788964,
+              0.015235888,
+              0.0034493478,
+              -0.02607555,
+              0.060282644,
+              -0.028004775,
+              0.040875163,
+              -0.023749253,
+              0.002289086,
+              0.04982698,
+              0.046928305,
+              -0.064160004,
+              0.013701618,
+              0.015511878,
+              -0.054725982,
+              -0.0459802,
+              0.03258067,
+              0.027034523,
+              0.01643672,
+              -0.041782584,
+              -0.03698569,
+              -0.023043923,
+              -0.07073365,
+              0.028486207,
+              0.0017764921,
+              -0.03352676,
+              -0.009977863,
+              0.024488676,
+              -0.01789395,
+              0.029737154,
+              -0.026266927,
+              -0.03567072,
+              0.07469971,
+              0.028393274,
+              -0.029625034,
+              -0.01053128,
+              0.09147493,
+              -0.018718474,
+              0.0012933073,
+              -0.021214467,
+              0.07475739,
+              -0.007773536,
+              0.048597455,
+              0.005216022,
+              -1.6914717e-08,
+              -0.05724563,
+              -0.0938908,
+              -0.034359876,
+              -0.037500683,
+              -0.020235153,
+              0.06142227,
+              -0.042273093,
+              -0.008759724,
+              -0.009908796,
+              0.016232042,
+              -0.014239323,
+              0.024709346,
+              -0.030538557,
+              -0.05391127,
+              -0.051778477,
+              0.01277344,
+              0.0036140021,
+              -0.012569925,
+              -0.025041323,
+              -0.0203936,
+              0.025865255,
+              0.010908398,
+              0.027834684,
+              0.009661084,
+              -0.006598172,
+              0.07860872,
+              0.054516125,
+              0.042956624,
+              -0.06275145,
+              -0.025701547,
+              0.08085865,
+              0.030041302,
+              0.02248997,
+              -0.0840195,
+              0.00029938898,
+              0.10966559,
+              0.118907265,
+              0.063014604,
+              0.037847042,
+              0.032069027,
+              -0.05345487,
+              -0.022730324,
+              0.0071888734,
+              0.037573762,
+              -0.020178014,
+              -0.090167634,
+              -0.07191704,
+              -0.02604166,
+              -0.043885063,
+              -0.14087014,
+              -0.017230472,
+              -0.012063355,
+              -0.046736836,
+              0.039048597,
+              -0.060394738,
+              0.022166032,
+              0.025670663,
+              0.022949725,
+              -0.06707243,
+              -0.014654702,
+              0.057985142,
+              0.10511708,
+              0.05698323,
+              -0.017205814
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/f7a80fae588892aa7031ac972c12030f2bd2ee609d672e9f44275c601800b144.json
+++ b/tests/integration/common/recordings/f7a80fae588892aa7031ac972c12030f2bd2ee609d672e9f44275c601800b144.json
@ -0,0 +1,423 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 0"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.020637129,
+              0.048547756,
+              -0.12516363,
+              0.01991118,
+              -0.006535745,
+              -0.017178575,
+              0.027727997,
+              0.032170568,
+              -0.07302972,
+              0.008939002,
+              0.11493648,
+              0.0058907545,
+              0.0058539375,
+              -0.077171296,
+              -0.06883132,
+              0.0039748913,
+              -0.046849657,
+              -0.072902456,
+              -0.010890429,
+              -0.0019311906,
+              -0.011614798,
+              0.003689495,
+              -0.03695609,
+              -0.009029024,
+              0.017461002,
+              -0.004713484,
+              -0.010254731,
+              -0.026636763,
+              -0.026125714,
+              -0.046913657,
+              0.017024228,
+              0.0713477,
+              0.07881179,
+              0.03789051,
+              0.21716279,
+              -0.0077837943,
+              0.04686894,
+              0.020414647,
+              7.314368e-05,
+              0.0103133675,
+              0.059848394,
+              -0.04321678,
+              -0.011937493,
+              -0.021149047,
+              0.021315353,
+              -0.00072822213,
+              -0.046116166,
+              -0.0046820445,
+              0.016943695,
+              -0.03249135,
+              -0.055184096,
+              4.1543382e-05,
+              -0.034172166,
+              -0.023247559,
+              0.020267941,
+              0.012827845,
+              0.065036125,
+              0.07180022,
+              -0.013490698,
+              0.06376413,
+              -0.017730094,
+              -0.01806601,
+              -0.034191083,
+              0.008955718,
+              0.098446764,
+              -0.0061265854,
+              -0.06815829,
+              -0.039525956,
+              0.060588058,
+              -0.094874755,
+              -0.11774928,
+              0.019538416,
+              -0.014697532,
+              0.04773719,
+              -0.061298393,
+              0.030337377,
+              -0.0022184649,
+              0.019007793,
+              0.024370821,
+              -0.07063359,
+              -0.07582954,
+              -0.10816809,
+              0.031845964,
+              -0.057830192,
+              -0.04169559,
+              0.0752806,
+              0.019289386,
+              0.028845867,
+              0.0077010663,
+              0.013930818,
+              -0.067987345,
+              0.012679873,
+              -0.07907268,
+              0.0143718915,
+              -0.021433424,
+              0.11880779,
+              -0.016258432,
+              0.07099568,
+              0.035778854,
+              0.028776454,
+              0.013304291,
+              -0.05192297,
+              0.026758345,
+              0.10282426,
+              -0.003306269,
+              -0.03239622,
+              0.083044045,
+              -0.0412691,
+              0.043435257,
+              -0.043423533,
+              -0.013239603,
+              -0.0029038454,
+              0.038365215,
+              -0.10401672,
+              0.012744224,
+              -0.122984126,
+              -0.008942817,
+              0.06162198,
+              -0.120285526,
+              0.043005254,
+              0.04814879,
+              -0.036352232,
+              -0.003885529,
+              -0.018503373,
+              -0.088186465,
+              -0.0031517749,
+              0.09290919,
+              -1.1695094e-33,
+              -0.015589721,
+              -0.13189551,
+              0.008088751,
+              0.06899503,
+              0.07353927,
+              -0.030646399,
+              0.05110342,
+              0.03081624,
+              -0.07850498,
+              -0.021147482,
+              0.00017823944,
+              -0.10502706,
+              0.030078856,
+              0.02572523,
+              -0.068158925,
+              -0.025015576,
+              0.021830637,
+              0.049748335,
+              0.01520941,
+              -0.080153145,
+              0.06796621,
+              0.021865685,
+              -0.034017574,
+              -0.030821111,
+              -0.048006665,
+              0.0005615041,
+              -0.0137883695,
+              -0.04500587,
+              0.015368256,
+              -0.0043663937,
+              0.037706476,
+              0.0049090013,
+              -0.06216566,
+              0.03060772,
+              0.030548712,
+              0.029262561,
+              0.020701125,
+              0.0056516766,
+              0.010610447,
+              0.019530762,
+              -0.05664136,
+              -0.022654066,
+              -0.0010107337,
+              -0.020805702,
+              -0.012242364,
+              -0.05591731,
+              -0.049421698,
+              0.024721064,
+              0.05803342,
+              0.010474127,
+              -0.008790625,
+              0.025362873,
+              0.020258408,
+              0.004368581,
+              -0.01018003,
+              0.012385932,
+              -0.037656736,
+              -0.05642639,
+              0.020923307,
+              0.022813153,
+              -0.005735433,
+              0.015326356,
+              -0.108707875,
+              0.048076265,
+              0.023256551,
+              -0.10311626,
+              0.061980195,
+              -0.07340407,
+              0.051583096,
+              0.07360003,
+              -0.029443117,
+              -0.014564469,
+              0.042043358,
+              -0.020252181,
+              0.0147808045,
+              -0.0285806,
+              0.07891856,
+              0.056849223,
+              -0.106308356,
+              0.0197874,
+              0.0269322,
+              -0.04749746,
+              -0.066681586,
+              -0.10474516,
+              0.012599429,
+              -0.056163482,
+              -0.04901015,
+              -0.04571026,
+              0.09704481,
+              -0.105899766,
+              0.044303197,
+              -0.020125533,
+              -0.0368709,
+              -0.015417924,
+              0.042297333,
+              -8.289866e-35,
+              0.07415767,
+              0.10998298,
+              -0.016995763,
+              0.01066263,
+              -0.0012327223,
+              0.028000232,
+              0.0714317,
+              -0.02320065,
+              -0.07778205,
+              0.11864239,
+              -0.016559754,
+              0.037961867,
+              0.02930022,
+              -0.008237686,
+              0.059777655,
+              0.008086454,
+              0.02075205,
+              0.025284613,
+              -0.055471037,
+              0.0073576584,
+              -0.013398135,
+              0.11896543,
+              -0.014611002,
+              0.07691816,
+              -0.019711656,
+              -0.01920917,
+              -0.004744884,
+              0.08173054,
+              0.019665759,
+              -0.013193461,
+              0.06215852,
+              0.07420406,
+              -0.073212065,
+              0.036052067,
+              0.07328616,
+              -0.057373393,
+              0.08346425,
+              0.018834447,
+              0.03309735,
+              0.041197047,
+              0.033917964,
+              0.09151449,
+              -0.051731598,
+              0.049615093,
+              0.01124018,
+              0.06661862,
+              0.07268375,
+              -0.013245848,
+              -0.039673895,
+              -0.012173254,
+              0.0017787582,
+              -0.05746287,
+              -0.013884767,
+              0.020205025,
+              -0.029692367,
+              -0.031010685,
+              0.0149556715,
+              0.026381323,
+              -0.025382591,
+              0.0074336748,
+              -0.00949915,
+              0.015655186,
+              -0.0012397208,
+              -0.0032508406,
+              -0.046632554,
+              -0.0030316226,
+              -0.007273208,
+              0.064231135,
+              -0.034431897,
+              -0.06433184,
+              0.045421343,
+              -0.010773523,
+              -0.017881984,
+              0.010312532,
+              -0.024369273,
+              -0.008478495,
+              -0.02457377,
+              -0.0263535,
+              -0.027263613,
+              0.047060315,
+              0.08128726,
+              0.0045517692,
+              -0.010821656,
+              0.026526682,
+              0.018961033,
+              0.059243083,
+              0.001561823,
+              0.09838158,
+              0.00822081,
+              0.008796511,
+              -0.0060577285,
+              0.028892087,
+              0.08253284,
+              0.049560018,
+              0.023363132,
+              -1.498271e-08,
+              -0.036891207,
+              -0.10629833,
+              0.030452948,
+              0.049268734,
+              -0.0030453752,
+              0.07413954,
+              -0.07043819,
+              -0.034285706,
+              -0.009679971,
+              -0.046219327,
+              0.013510038,
+              -0.018686565,
+              -0.048570327,
+              0.0028313443,
+              -0.06190722,
+              -0.053201936,
+              0.0060967463,
+              -0.043467365,
+              0.042226154,
+              0.03455835,
+              -0.0375257,
+              0.023590367,
+              0.054896712,
+              0.029878648,
+              0.019286606,
+              0.026097741,
+              0.06938145,
+              0.06272366,
+              -0.09566521,
+              -0.07481147,
+              0.025204772,
+              0.039396077,
+              0.036375154,
+              -0.01104443,
+              -0.028223084,
+              0.111878626,
+              0.13400707,
+              0.06680113,
+              -0.011737675,
+              -0.03585406,
+              -0.07978788,
+              0.032793757,
+              -0.0021075818,
+              -0.028365146,
+              -0.042218164,
+              -0.08132239,
+              -0.0753423,
+              -0.043771427,
+              -0.015633285,
+              -0.14193884,
+              -0.055949364,
+              0.025526602,
+              -0.023186589,
+              0.061106257,
+              -0.056208834,
+              0.00838827,
+              0.014720396,
+              -0.014650135,
+              -0.012830787,
+              0.08434067,
+              0.024660436,
+              0.05366935,
+              0.005782819,
+              -0.10599063
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/ff568685962ecba61ca6c2811cb2576f78baaac506fc2c69bb14079201783605.json
+++ b/tests/integration/common/recordings/ff568685962ecba61ca6c2811cb2576f78baaac506fc2c69bb14079201783605.json
@ -0,0 +1,807 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "nomic-embed-text:latest",
+      "input": [
+        "This is batch test file 0"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "nomic-embed-text:latest"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.04614301,
+              -0.020081447,
+              -0.13696706,
+              0.014573554,
+              0.0701535,
+              -0.023059264,
+              0.0017123591,
+              0.046829354,
+              -0.04367561,
+              -0.114361376,
+              -0.035698596,
+              0.06475817,
+              0.05364872,
+              0.038444433,
+              -0.01979135,
+              -0.037200496,
+              0.036657624,
+              -0.06543346,
+              0.019384816,
+              0.014107363,
+              0.08575862,
+              0.005253997,
+              -0.068923116,
+              0.010090121,
+              0.12664902,
+              0.007504762,
+              -0.01953818,
+              0.050352264,
+              -0.054028552,
+              -0.032810874,
+              0.06410688,
+              0.02482149,
+              0.013947014,
+              -0.018964177,
+              -0.014869268,
+              -0.014962293,
+              0.015406188,
+              0.0019375562,
+              0.045115244,
+              0.01663003,
+              0.011144363,
+              -0.0072048977,
+              0.03155388,
+              -0.044834215,
+              -0.0060860706,
+              0.04020486,
+              0.018086052,
+              0.04788624,
+              -0.00983748,
+              0.013905991,
+              -0.015586391,
+              -0.04333209,
+              -0.025767654,
+              0.0115056895,
+              0.023292946,
+              7.8543904e-05,
+              -0.025338432,
+              0.027270807,
+              -0.033308506,
+              -0.0034880606,
+              0.027414253,
+              0.023092583,
+              -0.046051297,
+              0.05614708,
+              0.0013318001,
+              -0.009060849,
+              -0.025585877,
+              0.01975632,
+              0.005582998,
+              0.029287277,
+              0.04566754,
+              -0.025097856,
+              -0.031588476,
+              0.010089205,
+              -0.032345522,
+              -0.05282097,
+              -0.027767532,
+              -0.064588815,
+              -0.048720118,
+              -0.019109437,
+              0.018205147,
+              0.023525203,
+              0.030500842,
+              0.062187836,
+              0.056703616,
+              -0.012734468,
+              -0.0023994881,
+              -0.01470007,
+              -0.014610555,
+              0.041148573,
+              0.02209264,
+              0.016696744,
+              0.01664216,
+              -0.042584907,
+              -0.030513879,
+              0.009484068,
+              0.038292237,
+              0.049507294,
+              -0.008864681,
+              -0.026641846,
+              -0.00806868,
+              0.015242631,
+              0.03148721,
+              -0.029101137,
+              -0.001128117,
+              0.047483873,
+              -0.021579307,
+              -0.0061297114,
+              -0.051103026,
+              -0.01100252,
+              -0.007417349,
+              0.033126004,
+              -0.03208908,
+              -0.014004128,
+              0.0055860616,
+              -0.004471519,
+              0.040237978,
+              -0.011666332,
+              -0.03375841,
+              0.010431967,
+              0.015548171,
+              -0.003738259,
+              0.006507693,
+              0.044239193,
+              0.022051405,
+              0.0369485,
+              -0.08473572,
+              0.050257385,
+              0.021561263,
+              -0.038263254,
+              -0.0028757958,
+              0.004459847,
+              -0.0499833,
+              -0.05051039,
+              0.038672045,
+              0.027102912,
+              -0.038302545,
+              -0.04273586,
+              -0.008564898,
+              0.010148107,
+              -0.033453222,
+              0.025933161,
+              0.034907244,
+              0.05704188,
+              0.047914036,
+              -0.033055704,
+              0.037968747,
+              0.021832222,
+              -0.021085719,
+              0.020705225,
+              0.0013380332,
+              0.0033825892,
+              -0.004659198,
+              0.03569596,
+              0.035501115,
+              -0.07247981,
+              0.024580602,
+              -0.016031673,
+              0.0043628234,
+              0.044260535,
+              0.022414433,
+              -0.024638122,
+              -0.027389847,
+              -0.015699405,
+              -0.0736989,
+              -0.07402259,
+              0.021693923,
+              0.08675446,
+              0.07019457,
+              -0.010507776,
+              -0.053982176,
+              -0.050338153,
+              0.022691121,
+              -0.009254433,
+              -0.08471297,
+              -0.02192142,
+              0.01370606,
+              -0.007591457,
+              0.04464477,
+              -0.041420456,
+              0.014799598,
+              -0.017481469,
+              0.025636235,
+              0.021010395,
+              -0.007866782,
+              -0.044533994,
+              0.02992151,
+              -0.01817989,
+              -0.046332225,
+              -0.038017664,
+              -0.010766631,
+              -0.053923517,
+              -0.06885444,
+              -0.083982274,
+              0.0044967085,
+              -0.059554394,
+              -0.012864852,
+              -0.006990424,
+              0.04381485,
+              -0.019732013,
+              0.0047561186,
+              0.012573004,
+              -0.084608465,
+              0.044956904,
+              -0.043024026,
+              -0.008965278,
+              -0.018597735,
+              0.023019703,
+              -0.058244467,
+              0.03946037,
+              0.0070161144,
+              -0.0072559468,
+              -0.038439214,
+              0.007182057,
+              0.04479635,
+              -0.04825045,
+              0.020951761,
+              -0.04177098,
+              -0.015445904,
+              0.0024602767,
+              0.019107025,
+              0.01879466,
+              0.015647886,
+              -0.03868733,
+              0.0010552967,
+              -0.031725515,
+              0.003677792,
+              -0.008869332,
+              0.016350579,
+              -0.025660282,
+              -0.0033997998,
+              -0.053593792,
+              0.01300085,
+              0.014504953,
+              -0.04167999,
+              -0.013626902,
+              -0.013473784,
+              0.04477186,
+              -0.039079364,
+              0.045125194,
+              -0.038965665,
+              0.041032016,
+              -0.021128332,
+              -0.00079906755,
+              0.0105881365,
+              -0.023438545,
+              -0.009942863,
+              -0.028716002,
+              0.031107599,
+              0.017214399,
+              -0.027654208,
+              0.013554825,
+              0.019173222,
+              0.055249233,
+              0.00617875,
+              -0.01951432,
+              -0.008078177,
+              -0.045365352,
+              -0.013410786,
+              -0.06576592,
+              0.0258962,
+              -0.04870149,
+              0.028375717,
+              0.02127929,
+              0.0074190334,
+              -0.015849123,
+              -0.050413407,
+              0.027654368,
+              -0.01447592,
+              -0.0046318294,
+              0.003353468,
+              0.02456171,
+              -0.006699941,
+              -0.04072025,
+              0.030406132,
+              0.003700867,
+              0.04991202,
+              0.043061696,
+              -0.0014100377,
+              0.038879305,
+              -0.02551224,
+              -0.03253989,
+              0.002335025,
+              0.0066566374,
+              0.013019894,
+              -0.017884579,
+              0.03333752,
+              -0.005329557,
+              0.025703372,
+              0.01243421,
+              0.0015536154,
+              -0.0011326541,
+              -0.02956871,
+              0.010284604,
+              -0.0017640645,
+              0.030444842,
+              0.024831444,
+              -0.015894072,
+              -0.017051669,
+              -0.012481152,
+              -0.021874228,
+              0.032245617,
+              0.029441461,
+              -0.019289171,
+              0.015042458,
+              -0.048809912,
+              0.045543794,
+              -0.025887825,
+              -0.0017429133,
+              0.050035972,
+              -0.094813764,
+              -0.026645338,
+              -0.03496652,
+              0.02787559,
+              -0.009335962,
+              0.050203443,
+              0.007864018,
+              0.008651598,
+              0.07439614,
+              -0.04608253,
+              0.008741113,
+              -0.096183665,
+              0.01909248,
+              0.02903942,
+              -0.020657493,
+              0.03056416,
+              0.025593685,
+              0.05326756,
+              -0.035201855,
+              -0.0042431992,
+              0.047313657,
+              0.04643017,
+              -0.015038583,
+              -0.03623203,
+              0.06975197,
+              0.02893981,
+              -0.012065428,
+              0.03489605,
+              -0.02045082,
+              0.08106463,
+              0.03046569,
+              0.05845714,
+              0.038226783,
+              -0.0039640213,
+              0.020310445,
+              -0.044298742,
+              -0.011063444,
+              0.031646963,
+              -0.016750742,
+              0.06093846,
+              -0.0024345908,
+              0.0137670245,
+              0.01068818,
+              0.00028172386,
+              0.024276268,
+              0.007246687,
+              0.017009424,
+              -0.0058112424,
+              0.055742696,
+              0.0020487534,
+              0.0041393945,
+              -0.002708682,
+              -0.0111793615,
+              -0.016895374,
+              0.009005465,
+              0.025580926,
+              -0.015197682,
+              0.0152440565,
+              0.049733438,
+              0.00909726,
+              -0.04997614,
+              -0.054340348,
+              0.047531743,
+              0.052675292,
+              0.0002477018,
+              0.017530492,
+              -0.04548658,
+              0.0034042338,
+              -0.027109472,
+              0.0540901,
+              0.05400029,
+              -0.039156314,
+              -0.010473526,
+              0.036758192,
+              0.012307892,
+              -0.043290082,
+              0.021435479,
+              -0.013614977,
+              0.047010962,
+              0.061001405,
+              0.0067372657,
+              0.0227589,
+              -0.0519168,
+              0.012738339,
+              -0.027280986,
+              0.0012095303,
+              0.053970173,
+              0.011808772,
+              -0.06391073,
+              0.049324006,
+              -0.016165268,
+              -0.035052363,
+              0.011938826,
+              0.033804722,
+              -0.033935532,
+              0.014987266,
+              -0.03362387,
+              -0.022560425,
+              0.05126289,
+              -0.008983691,
+              0.05116898,
+              -0.053964064,
+              -0.038813572,
+              -0.06834585,
+              0.0425859,
+              0.029469976,
+              0.06586096,
+              0.056827266,
+              -0.028529037,
+              -0.022799347,
+              0.03930842,
+              0.009058165,
+              0.0029452725,
+              -0.046222363,
+              -0.015354657,
+              0.020766245,
+              0.00544761,
+              0.054154783,
+              0.024518205,
+              -0.0762551,
+              -0.03815425,
+              0.030558256,
+              -0.014623021,
+              0.04429291,
+              -0.02593325,
+              0.06950066,
+              -0.011652937,
+              0.00784224,
+              0.010082946,
+              0.02640965,
+              0.044778366,
+              -0.017441178,
+              -0.042124864,
+              0.030845765,
+              -0.047991402,
+              0.1127873,
+              0.11150797,
+              -0.0745599,
+              -0.059560712,
+              -0.00808373,
+              -0.008904883,
+              0.047381986,
+              -0.03259649,
+              -0.0034343451,
+              0.043409795,
+              -0.011778097,
+              0.017888952,
+              -0.042976636,
+              -0.014014427,
+              0.013991117,
+              0.008008242,
+              -0.005016844,
+              0.053890087,
+              0.056538153,
+              0.016641492,
+              -0.011209175,
+              0.005071369,
+              -0.031119458,
+              -0.012060056,
+              0.047321502,
+              -0.01410517,
+              -0.06337502,
+              0.057011377,
+              -0.046111424,
+              -0.022285707,
+              0.00068395643,
+              -0.01453697,
+              0.0030104562,
+              0.031148981,
+              0.029581407,
+              0.007647941,
+              0.011242783,
+              -0.026178291,
+              -0.05194385,
+              0.037139274,
+              0.026292743,
+              0.01298006,
+              0.023150109,
+              0.06221823,
+              -0.024437338,
+              0.056873403,
+              0.027463028,
+              0.07723492,
+              0.0019251422,
+              0.042778768,
+              -0.026794884,
+              -0.016140813,
+              -0.037990715,
+              0.0015520528,
+              0.01605836,
+              -0.012476547,
+              -0.01679565,
+              0.027481532,
+              0.018949807,
+              0.010083091,
+              -0.01057625,
+              -0.024935285,
+              -0.031943906,
+              -0.051917356,
+              -0.04344679,
+              -0.04837223,
+              -0.009939983,
+              0.040695325,
+              0.024695948,
+              0.063317895,
+              -0.0018597379,
+              0.016552558,
+              -0.047521863,
+              -0.07224005,
+              0.042071674,
+              0.016915316,
+              0.014148548,
+              0.01878253,
+              -0.026108567,
+              -0.06437781,
+              0.021399872,
+              0.011175348,
+              0.0033761705,
+              -0.004680718,
+              0.03344319,
+              0.0031177911,
+              0.053175025,
+              0.028025331,
+              -0.0069551654,
+              -0.034634676,
+              -0.012221638,
+              -0.035786934,
+              0.04296345,
+              -0.01631924,
+              0.060271725,
+              -0.04230959,
+              -0.0064216405,
+              0.0013953961,
+              -0.041444454,
+              -0.008569435,
+              -0.01984154,
+              -0.061582044,
+              0.049848285,
+              -0.010022811,
+              -0.07785035,
+              -0.006366211,
+              -0.012778517,
+              -0.037107654,
+              -0.034078293,
+              -0.0019027964,
+              -0.018393178,
+              -0.031273652,
+              -0.030624373,
+              -0.047289733,
+              -0.055507194,
+              0.0149980355,
+              0.009802669,
+              0.05346352,
+              0.011616594,
+              0.040882636,
+              -0.05801636,
+              -0.018325027,
+              0.033699974,
+              -0.015700053,
+              -0.018874831,
+              0.00975098,
+              -0.028787887,
+              -0.010430304,
+              -0.019937277,
+              -0.025684841,
+              -0.017275153,
+              0.048182886,
+              0.040767677,
+              -0.006017042,
+              -0.012711738,
+              -0.0010345151,
+              0.015744662,
+              0.023162043,
+              0.02130765,
+              -0.0024493549,
+              0.015457228,
+              0.037933253,
+              -0.031316977,
+              0.06891338,
+              0.005748761,
+              -0.07730445,
+              -0.032125294,
+              0.036361482,
+              0.0061598606,
+              0.018043444,
+              0.038325332,
+              -0.036203355,
+              -0.0123121375,
+              -0.022851182,
+              -0.035532467,
+              -0.041686766,
+              0.03709366,
+              -0.0017735043,
+              -0.018472947,
+              -0.045957465,
+              -0.023627242,
+              0.01808581,
+              0.015027068,
+              -0.042559687,
+              -0.009885546,
+              0.057179235,
+              -0.03215653,
+              -0.048862357,
+              -0.012386838,
+              -0.021847295,
+              -0.044682942,
+              0.040646516,
+              0.00038476288,
+              0.005513208,
+              -0.03062349,
+              0.011521192,
+              -0.035988722,
+              0.061369143,
+              -0.020910813,
+              0.075483516,
+              -0.045259465,
+              -0.02859422,
+              0.015579937,
+              0.0075254533,
+              0.038143836,
+              -0.045940828,
+              0.027484732,
+              -0.091758996,
+              -0.048610084,
+              -0.095563754,
+              0.0004537795,
+              -0.05040322,
+              0.02240349,
+              0.046084013,
+              0.04480506,
+              0.037050348,
+              1.0597447e-05,
+              -0.018571958,
+              0.009857055,
+              0.021747472,
+              0.031625595,
+              -0.03629067,
+              0.037058298,
+              0.041504655,
+              -0.03894645,
+              0.046530657,
+              0.08956203,
+              0.05101704,
+              0.005822723,
+              -0.014409921,
+              0.0050498573,
+              0.039041325,
+              -0.010459366,
+              -0.022216242,
+              -0.07559245,
+              0.019515479,
+              -0.010434134,
+              -0.040965218,
+              0.006768683,
+              0.021648958,
+              0.059341215,
+              0.0044922573,
+              -0.011139294,
+              0.023696495,
+              -0.04251101,
+              0.028621383,
+              0.005927879,
+              0.05084491,
+              -0.01525845,
+              0.03151167,
+              0.008018476,
+              0.05309983,
+              0.059823282,
+              -0.02189311,
+              0.010798892,
+              0.0027545195,
+              -0.024435053,
+              0.042531513,
+              0.028011957,
+              0.0147431465,
+              -0.062116392,
+              0.032930456,
+              -0.03597175,
+              0.002567075,
+              -0.030825771,
+              -0.0070259375,
+              0.007989939,
+              -0.027159046,
+              -0.0714439,
+              -0.020082822,
+              -0.018486606,
+              0.01108784,
+              -0.012602704,
+              -0.0012252157,
+              0.06443626,
+              0.036829114,
+              0.04501229,
+              0.0022744364,
+              0.058829524,
+              -0.008902569,
+              -0.010049271,
+              -0.0064951205,
+              -0.014354489,
+              -0.044668842,
+              -0.025392724,
+              0.015202658,
+              0.020321742,
+              -0.01176466,
+              0.09413702,
+              -0.0319812,
+              0.03219725,
+              -0.040439297,
+              -0.019967683,
+              0.0164714,
+              0.019272799,
+              0.02388655,
+              0.017886775,
+              -0.03603167,
+              -0.023737542,
+              -0.01898098,
+              -0.04790894,
+              -0.036694597,
+              -0.02994124,
+              0.034576166,
+              -0.05921917,
+              -0.022381892,
+              -0.051536635,
+              -0.05452498,
+              0.053339027,
+              0.019327087,
+              0.012448543,
+              -0.018923279,
+              -0.0019192714,
+              -0.01976354,
+              0.032581042,
+              -0.00695812,
+              0.033768184,
+              -0.028018538,
+              -0.023666212,
+              -0.017496848,
+              0.023191998,
+              -0.0502938,
+              0.01670451,
+              0.0058311033,
+              0.012473936,
+              0.023568941,
+              -0.06854558,
+              0.0073930174,
+              0.07903637,
+              -0.024922114,
+              -0.026363779,
+              -0.006970082,
+              -0.007723444,
+              0.074576765,
+              -0.032073244,
+              -0.013143484,
+              -0.010095435,
+              0.018318929,
+              0.008086707,
+              -0.01570327,
+              -0.046567768,
+              0.0038824868,
+              -0.027711825
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "nomic-embed-text:latest",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-44869b1b.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-44869b1b.json
@ -0,0 +1,34 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1759793684,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1759791776,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-79be7c70.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-79be7c70.json
@ -0,0 +1,25 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1759785110,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-7becc84f.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-7becc84f.json
@ -0,0 +1,70 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "nomic-embed-text:latest",
+          "created": 1755204798,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:8b",
+          "created": 1755125995,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753804403,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1752697170,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:latest",
+          "created": 1752691712,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1752267588,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e3b0c442.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e3b0c442.json
@ -0,0 +1,15 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e8733dec.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-e8733dec.json
@ -0,0 +1,25 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1759791776,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/common/recordings/models-bd3df37825f32706c88677a327960bfa47dcf93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json
+++ b/tests/integration/common/recordings/models-bd3df37825f32706c88677a327960bfa47dcf93f2ea6ed882f1186cf4fdda5bb-d5d684a3.json
@ -0,0 +1,528 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-dev-fp8",
+          "created": 1729532889,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-max",
+          "created": 1750714611,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-pro",
+          "created": 1750488264,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
+          "created": 1748467427,
+          "object": "model",
+          "owned_by": "sentientfoundation-serverless",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
+          "created": 1739563474,
+          "object": "model",
+          "owned_by": "sentientfoundation",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-120b",
+          "created": 1754345600,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
+          "created": 1753916446,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
+          "created": 1753124424,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
+          "created": 1753455434,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3-0324",
+          "created": 1742827220,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct",
+          "created": 1752259096,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-20b",
+          "created": 1754345466,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct-0905",
+          "created": 1757018994,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3",
+          "created": 1735576668,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+          "created": 1733442103,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b",
+          "created": 1745885249,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5-air",
+          "created": 1754089426,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1",
+          "created": 1737397673,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-basic",
+          "created": 1742306746,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1",
+          "created": 1755758988,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-schnell-fp8",
+          "created": 1729535376,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
+          "created": 1721428386,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-scout-instruct-basic",
+          "created": 1743878279,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b",
+          "created": 1745878133,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
+          "created": 1721287357,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-0528",
+          "created": 1748456377,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
+          "created": 1713375508,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 65536
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-embedding-8b",
+          "created": 1755707090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "EMBEDDING_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 40960
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
+          "created": 1753808388,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+          "created": 1743878495,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
+          "created": 1743381121,
+          "object": "model",
+          "owned_by": "tvergho-87e44d",
+          "kind": "HF_PEFT_ADDON",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
+          "created": 1743392739,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false,
+          "context_length": 128000
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
+          "created": 1754063588,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1-terminus",
+          "created": 1758586241,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "created": 1721692808,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          "created": 1753211090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5",
+          "created": 1753809636,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
+++ b/tests/integration/recordings/responses/models-7d9446738fd7-d5d684a3.json
@ -0,0 +1,527 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "https://api.fireworks.ai/inference/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-dev-fp8",
+          "created": 1729532889,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-max",
+          "created": 1750714611,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-kontext-pro",
+          "created": 1750488264,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
+          "created": 1748467427,
+          "object": "model",
+          "owned_by": "sentientfoundation-serverless",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
+          "created": 1739563474,
+          "object": "model",
+          "owned_by": "sentientfoundation",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-120b",
+          "created": 1754345600,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
+          "created": 1753916446,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
+          "created": 1753124424,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
+          "created": 1753455434,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-embedding-8b",
+          "created": 1755707090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 40960
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3-0324",
+          "created": 1742827220,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct",
+          "created": 1752259096,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/gpt-oss-20b",
+          "created": 1754345466,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+          "created": 1743878495,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/kimi-k2-instruct-0905",
+          "created": 1757018994,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3",
+          "created": 1735576668,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+          "created": 1733442103,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-235b-a22b",
+          "created": 1745885249,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5-air",
+          "created": 1754089426,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1",
+          "created": 1737397673,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
+          "created": 1721692808,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-basic",
+          "created": 1742306746,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1",
+          "created": 1755758988,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/flux-1-schnell-fp8",
+          "created": 1729535376,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "FLUMINA_BASE_MODEL",
+          "supports_chat": false,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
+          "created": 1721428386,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama4-scout-instruct-basic",
+          "created": 1743878279,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": true,
+          "context_length": 1048576
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b",
+          "created": 1745878133,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
+          "created": 1721287357,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-r1-0528",
+          "created": 1748456377,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/mixtral-8x22b-instruct",
+          "created": 1713375508,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 65536
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
+          "created": 1753808388,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
+          "created": 1743392739,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": true,
+          "supports_tools": false,
+          "context_length": 128000
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
+          "created": 1754063588,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
+          "created": 1743381121,
+          "object": "model",
+          "owned_by": "tvergho-87e44d",
+          "kind": "HF_PEFT_ADDON",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": false
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/deepseek-v3p1-terminus",
+          "created": 1758586241,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 163840
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          "created": 1753211090,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 262144
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "accounts/fireworks/models/glm-4p5",
+          "created": 1753809636,
+          "object": "model",
+          "owned_by": "fireworks",
+          "kind": "HF_BASE_MODEL",
+          "supports_chat": true,
+          "supports_image_input": false,
+          "supports_tools": true,
+          "context_length": 131072
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/models-bd032f995f2a-7becc84f.json
+++ b/tests/integration/recordings/responses/models-bd032f995f2a-7becc84f.json
@ -0,0 +1,69 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "nomic-embed-text:latest",
+          "created": 1755204798,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:8b",
+          "created": 1755125995,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753804403,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1752697170,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:latest",
+          "created": 1752691712,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama-guard3:1b",
+          "created": 1752267588,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/0fbf282a067bb1fe2c9fa5c96287b1a0700b6c74372d556c873dda39c603d844.json
+++ b/tests/integration/vector_io/recordings/0fbf282a067bb1fe2c9fa5c96287b1a0700b6c74372d556c873dda39c603d844.json
@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_retrieve_contents[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 2"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.014871168,
+              0.094365,
+              -0.098275684,
+              0.016189486,
+              0.072296426,
+              -0.039229725,
+              0.007638039,
+              0.035811495,
+              -0.03784589,
+              0.022591105,
+              0.15810202,
+              0.009195058,
+              -0.029846655,
+              -0.06448414,
+              -0.01898075,
+              -0.02023675,
+              -0.07593923,
+              -0.04666322,
+              0.010769107,
+              0.033283222,
+              0.06951838,
+              0.039086174,
+              -0.009640043,
+              -0.008601025,
+              0.039979056,
+              0.02799972,
+              -0.06578151,
+              0.08029443,
+              0.0101568075,
+              -0.07898879,
+              0.048795786,
+              0.057297125,
+              0.025737243,
+              0.03572965,
+              0.11485981,
+              0.030900626,
+              0.118485495,
+              -0.041167885,
+              -0.019413618,
+              -0.0009897926,
+              0.03717747,
+              -0.012367201,
+              -0.0026639055,
+              0.015703445,
+              -0.0046827365,
+              0.023138778,
+              0.012855939,
+              -0.029367425,
+              0.00042996072,
+              -0.003222942,
+              -0.055509202,
+              0.012830617,
+              -0.06941755,
+              -0.011024706,
+              0.07149942,
+              0.021040803,
+              0.0409756,
+              0.010087916,
+              -0.015326204,
+              0.06633094,
+              0.024846299,
+              0.030543685,
+              -0.036063526,
+              0.04786587,
+              0.08074621,
+              -0.051489003,
+              -0.03944393,
+              -0.025607359,
+              -0.030061793,
+              -0.119378455,
+              -0.14597124,
+              -0.0019379344,
+              0.008393092,
+              0.023913048,
+              0.028285578,
+              0.017838098,
+              -0.10575887,
+              0.008080291,
+              0.06388723,
+              -0.12506105,
+              -0.02536782,
+              -0.11007926,
+              0.051198784,
+              0.007446184,
+              -0.030837545,
+              0.09254253,
+              0.05638562,
+              -0.0155668175,
+              -0.031867314,
+              0.018337138,
+              0.02442871,
+              -0.042078987,
+              0.0038125275,
+              0.089955,
+              -0.008119613,
+              0.040103614,
+              0.011012824,
+              0.044628628,
+              0.0791957,
+              0.054247666,
+              -0.027651828,
+              -0.03190785,
+              0.041443683,
+              0.041629724,
+              -0.077835254,
+              -0.09937542,
+              0.029904107,
+              -0.05434366,
+              0.07058962,
+              -0.04535761,
+              0.03365359,
+              -0.061656676,
+              -0.018105442,
+              -0.07228336,
+              0.035377987,
+              -0.03161877,
+              -0.020589713,
+              0.058485094,
+              -0.049225487,
+              0.03934316,
+              0.08550028,
+              -0.029991213,
+              -0.05576064,
+              -0.029334918,
+              -0.053031918,
+              -0.061839186,
+              0.08176057,
+              -3.3282106e-33,
+              0.00018265574,
+              -0.09808404,
+              -0.00554673,
+              0.13180184,
+              0.026467713,
+              -0.03976283,
+              0.010410568,
+              0.022475285,
+              -0.07190717,
+              0.005138454,
+              -0.021325583,
+              -0.1046733,
+              0.0020021838,
+              0.023773609,
+              -0.057499945,
+              -0.011727483,
+              -0.020912478,
+              0.026353713,
+              0.01779019,
+              -0.0148312645,
+              0.064687304,
+              0.045060385,
+              -0.029312065,
+              -0.08633001,
+              -0.026792597,
+              0.014552106,
+              0.004505434,
+              -0.06774755,
+              0.034052122,
+              0.013713737,
+              -0.0075813113,
+              -0.059718475,
+              -0.016189422,
+              0.044314116,
+              0.026844766,
+              0.026430624,
+              0.024091395,
+              -0.0032406747,
+              -0.075288124,
+              0.032822173,
+              0.027104331,
+              -0.026295068,
+              0.04316082,
+              -0.010091815,
+              0.034184698,
+              -0.08266358,
+              -0.020962045,
+              -0.00719584,
+              0.068549044,
+              0.005033586,
+              0.0017975906,
+              0.06465498,
+              0.05990613,
+              -0.012483792,
+              0.024451919,
+              0.021659598,
+              -0.0046074707,
+              -0.004559902,
+              0.002713282,
+              0.062373567,
+              0.0035651235,
+              0.06017224,
+              -0.062707886,
+              0.039937016,
+              -0.0064443815,
+              -0.041358124,
+              -0.045459975,
+              -0.1090475,
+              0.08058783,
+              0.055110224,
+              -0.05126053,
+              -0.05976516,
+              0.037940193,
+              0.015456569,
+              -0.024956519,
+              -0.037877902,
+              -0.006799,
+              0.031685203,
+              -0.036858797,
+              -0.055584695,
+              -0.048513155,
+              -0.07101657,
+              -0.041681714,
+              -0.04429727,
+              -0.09584418,
+              -0.060873836,
+              0.008867621,
+              -0.106438614,
+              0.040050562,
+              -0.084729105,
+              0.018111277,
+              0.010153493,
+              -0.08883196,
+              -0.063969284,
+              0.08611972,
+              1.4074298e-33,
+              0.03433739,
+              0.037653737,
+              -0.05348675,
+              0.0015385789,
+              0.026684077,
+              0.026603375,
+              0.07006387,
+              -0.034265522,
+              -0.018221779,
+              0.10960259,
+              0.013464475,
+              -0.008325532,
+              0.019438146,
+              -0.039553005,
+              0.03469477,
+              -0.0123773115,
+              -0.013288484,
+              -0.048081715,
+              -0.019539693,
+              -0.0033996427,
+              -0.024453517,
+              0.061505664,
+              0.119236834,
+              0.026294904,
+              -0.01607055,
+              -0.011499089,
+              0.04267117,
+              0.0295908,
+              0.022084564,
+              0.007893738,
+              0.052055445,
+              0.05781507,
+              -0.13408813,
+              0.01778491,
+              0.021400984,
+              -0.12113228,
+              0.10535695,
+              -0.07358604,
+              -0.013651957,
+              0.04049295,
+              0.054150987,
+              0.0987462,
+              0.0110208625,
+              0.040327504,
+              0.034936633,
+              0.10400846,
+              0.12958324,
+              -0.024531014,
+              0.002284699,
+              -0.044239815,
+              0.049778443,
+              -0.055788964,
+              0.015235888,
+              0.0034493478,
+              -0.02607555,
+              0.060282644,
+              -0.028004775,
+              0.040875163,
+              -0.023749253,
+              0.002289086,
+              0.04982698,
+              0.046928305,
+              -0.064160004,
+              0.013701618,
+              0.015511878,
+              -0.054725982,
+              -0.0459802,
+              0.03258067,
+              0.027034523,
+              0.01643672,
+              -0.041782584,
+              -0.03698569,
+              -0.023043923,
+              -0.07073365,
+              0.028486207,
+              0.0017764921,
+              -0.03352676,
+              -0.009977863,
+              0.024488676,
+              -0.01789395,
+              0.029737154,
+              -0.026266927,
+              -0.03567072,
+              0.07469971,
+              0.028393274,
+              -0.029625034,
+              -0.01053128,
+              0.09147493,
+              -0.018718474,
+              0.0012933073,
+              -0.021214467,
+              0.07475739,
+              -0.007773536,
+              0.048597455,
+              0.005216022,
+              -1.6914717e-08,
+              -0.05724563,
+              -0.0938908,
+              -0.034359876,
+              -0.037500683,
+              -0.020235153,
+              0.06142227,
+              -0.042273093,
+              -0.008759724,
+              -0.009908796,
+              0.016232042,
+              -0.014239323,
+              0.024709346,
+              -0.030538557,
+              -0.05391127,
+              -0.051778477,
+              0.01277344,
+              0.0036140021,
+              -0.012569925,
+              -0.025041323,
+              -0.0203936,
+              0.025865255,
+              0.010908398,
+              0.027834684,
+              0.009661084,
+              -0.006598172,
+              0.07860872,
+              0.054516125,
+              0.042956624,
+              -0.06275145,
+              -0.025701547,
+              0.08085865,
+              0.030041302,
+              0.02248997,
+              -0.0840195,
+              0.00029938898,
+              0.10966559,
+              0.118907265,
+              0.063014604,
+              0.037847042,
+              0.032069027,
+              -0.05345487,
+              -0.022730324,
+              0.0071888734,
+              0.037573762,
+              -0.020178014,
+              -0.090167634,
+              -0.07191704,
+              -0.02604166,
+              -0.043885063,
+              -0.14087014,
+              -0.017230472,
+              -0.012063355,
+              -0.046736836,
+              0.039048597,
+              -0.060394738,
+              0.022166032,
+              0.025670663,
+              0.022949725,
+              -0.06707243,
+              -0.014654702,
+              0.057985142,
+              0.10511708,
+              0.05698323,
+              -0.017205814
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/4b8ce5031f00e754bbb6e1f55109ae8f46ac7086afb48820a36c41a3cb994cb9.json
+++ b/tests/integration/vector_io/recordings/4b8ce5031f00e754bbb6e1f55109ae8f46ac7086afb48820a36c41a3cb994cb9.json
@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_retrieve_contents[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is the content of test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.029406669,
+              0.08920982,
+              -0.11326726,
+              0.0065823817,
+              0.07725067,
+              -0.036890104,
+              0.030436223,
+              0.041454185,
+              -0.049156666,
+              0.018258564,
+              0.14662577,
+              0.01744915,
+              -0.012837422,
+              -0.06889876,
+              -0.039401636,
+              -0.038800705,
+              -0.08963421,
+              -0.059656583,
+              0.001375945,
+              0.045138627,
+              0.042796962,
+              0.053700265,
+              -0.035706885,
+              0.010138017,
+              0.060920056,
+              0.017344126,
+              -0.05633907,
+              0.063370295,
+              0.0021257724,
+              -0.083796844,
+              0.050487563,
+              0.047987595,
+              0.069071226,
+              0.049588464,
+              0.117036626,
+              0.05339311,
+              0.10129953,
+              -0.048230153,
+              -0.014987975,
+              0.0250915,
+              0.031392053,
+              -0.008863942,
+              0.0073650074,
+              -0.0009767569,
+              -0.016403567,
+              0.015523393,
+              -0.010998956,
+              -0.014870063,
+              0.0061682137,
+              -0.0017961137,
+              -0.022682818,
+              0.018210242,
+              -0.07757007,
+              -0.0015845516,
+              0.069547005,
+              0.000419109,
+              0.038414054,
+              0.005823485,
+              -0.028931383,
+              0.07009549,
+              -0.0018009909,
+              0.033516172,
+              -0.014593847,
+              0.03922457,
+              0.08240545,
+              -0.050596908,
+              -0.039732855,
+              -0.024425076,
+              -0.015055329,
+              -0.11705068,
+              -0.15979129,
+              -0.008256823,
+              -0.0100719705,
+              0.03266482,
+              0.0029998205,
+              0.0316428,
+              -0.094554916,
+              0.017661797,
+              0.058996264,
+              -0.119718134,
+              -0.027414676,
+              -0.09155906,
+              0.040038,
+              0.01091849,
+              -0.029446004,
+              0.10225186,
+              0.06583262,
+              -0.003439552,
+              -0.009694834,
+              0.016906522,
+              0.023685955,
+              -0.032616187,
+              -0.010238839,
+              0.07891618,
+              -0.007330681,
+              0.05238444,
+              0.00943625,
+              0.042121,
+              0.08491511,
+              0.049208272,
+              -0.01868227,
+              -0.013585418,
+              0.06727199,
+              0.084571496,
+              -0.103213035,
+              -0.08387524,
+              0.03641842,
+              -0.047227863,
+              0.057315867,
+              -0.04463932,
+              0.006783099,
+              -0.08934107,
+              -0.015040418,
+              -0.08107057,
+              0.013285569,
+              -0.060907867,
+              -0.042128306,
+              0.057306163,
+              -0.058711898,
+              0.04628304,
+              0.070194095,
+              -0.041729517,
+              -0.0338408,
+              -0.012369257,
+              -0.044708908,
+              -0.059450094,
+              0.08251312,
+              -3.443368e-33,
+              0.0121309515,
+              -0.11084454,
+              -0.020510655,
+              0.10916455,
+              0.033683147,
+              -0.02845083,
+              0.024345158,
+              0.034192592,
+              -0.08367815,
+              0.0064610844,
+              -0.00912456,
+              -0.0663567,
+              -0.0028754657,
+              0.008272698,
+              -0.09166764,
+              0.0089771375,
+              -0.03963948,
+              0.019947624,
+              -0.01321528,
+              -0.019034218,
+              0.051933073,
+              0.028107261,
+              -0.039153125,
+              -0.080395184,
+              -0.050503474,
+              0.02060341,
+              -0.012718284,
+              -0.046732575,
+              0.017907938,
+              -0.0028334607,
+              -0.011695137,
+              -0.05667005,
+              -0.043894444,
+              0.034919597,
+              0.022352098,
+              0.046777196,
+              0.045085873,
+              -0.008840106,
+              -0.06373453,
+              0.036720857,
+              0.012829601,
+              -0.035169926,
+              0.046209145,
+              -0.014361767,
+              0.03706697,
+              -0.056797564,
+              -0.06310496,
+              0.010818958,
+              0.047810175,
+              0.0029118094,
+              -0.003235893,
+              0.061511047,
+              0.072056666,
+              -0.03286638,
+              0.005070082,
+              0.021947902,
+              -0.017779002,
+              -0.022738373,
+              -0.021926457,
+              0.047074158,
+              0.010847615,
+              0.05539702,
+              -0.07119971,
+              0.033833236,
+              0.012342855,
+              -0.047586687,
+              -0.026776271,
+              -0.09885727,
+              0.10053448,
+              0.036877092,
+              -0.07049897,
+              -0.059692938,
+              0.016129492,
+              -0.0016443401,
+              -0.026804024,
+              -0.013527272,
+              -0.015385511,
+              0.055627547,
+              -0.060485132,
+              -0.055540122,
+              -0.04329072,
+              -0.07097361,
+              -0.04857043,
+              -0.03726256,
+              -0.09059366,
+              -0.036855534,
+              0.024561211,
+              -0.10113953,
+              0.056738112,
+              -0.10995085,
+              0.042282794,
+              0.014222368,
+              -0.07067843,
+              -0.05902307,
+              0.06426122,
+              1.6036318e-33,
+              0.037851896,
+              0.032911286,
+              -0.04029648,
+              -0.00049357174,
+              0.028011942,
+              0.048672136,
+              0.07279598,
+              -0.027471887,
+              -0.02847654,
+              0.114492,
+              0.001777095,
+              -0.009519909,
+              0.0025862327,
+              -0.056408145,
+              0.023462169,
+              -0.006209674,
+              -0.010567065,
+              -0.05877587,
+              -0.032393616,
+              0.011836781,
+              -0.038905054,
+              0.05516299,
+              0.09564333,
+              0.028543225,
+              -0.023832332,
+              -0.0015711841,
+              0.047049087,
+              0.03128219,
+              0.02811091,
+              0.007177092,
+              0.055283513,
+              0.06574452,
+              -0.1020208,
+              0.021213628,
+              0.020237882,
+              -0.10449357,
+              0.09608935,
+              -0.06253181,
+              0.015293753,
+              0.042053986,
+              0.06105009,
+              0.0909162,
+              0.018404186,
+              0.031023262,
+              0.03562763,
+              0.112073965,
+              0.10124763,
+              -0.007683015,
+              0.013140281,
+              -0.042280227,
+              0.051135287,
+              -0.02950743,
+              0.027794402,
+              -0.010734668,
+              -0.011067552,
+              0.058104575,
+              -0.009284788,
+              0.056184508,
+              -0.040822964,
+              0.010282754,
+              0.0374409,
+              0.054198533,
+              -0.061418086,
+              0.030569963,
+              0.0023648597,
+              -0.054184474,
+              -0.020570045,
+              0.012422129,
+              0.025696559,
+              -0.007607385,
+              -0.026194826,
+              -0.024159024,
+              0.0012979766,
+              -0.07461716,
+              0.051458035,
+              -0.004183808,
+              -0.040804464,
+              -0.023975441,
+              0.009455526,
+              -0.0018798193,
+              0.03668693,
+              -0.019319497,
+              -0.06195781,
+              0.06456675,
+              0.040328216,
+              -0.010790134,
+              0.013190221,
+              0.09067539,
+              -0.0051480443,
+              0.013312647,
+              -0.029548675,
+              0.07769003,
+              0.0027328292,
+              0.04533781,
+              -0.0017606319,
+              -1.661594e-08,
+              -0.040610366,
+              -0.09883059,
+              -0.05522113,
+              -0.02916469,
+              -0.019305382,
+              0.088138185,
+              -0.038325552,
+              -0.03327639,
+              -0.012629364,
+              0.006948921,
+              0.010438818,
+              0.026771523,
+              -0.040855426,
+              -0.03958403,
+              -0.051137064,
+              -0.016159322,
+              -0.020525131,
+              -0.023726366,
+              -0.013322245,
+              -0.008097836,
+              0.028000915,
+              0.02806969,
+              0.015645925,
+              -0.0043166955,
+              0.0054488196,
+              0.06720413,
+              0.068473674,
+              0.07172716,
+              -0.06339439,
+              -0.02540609,
+              0.08468492,
+              0.041936778,
+              0.021067144,
+              -0.07596481,
+              0.017143335,
+              0.1260291,
+              0.121315174,
+              0.08431059,
+              0.040587336,
+              0.036687315,
+              -0.04717,
+              -0.022659328,
+              -0.006820436,
+              0.005210712,
+              -0.033785924,
+              -0.08449115,
+              -0.0844501,
+              -0.03192747,
+              -0.036649443,
+              -0.13791409,
+              -0.036417518,
+              -0.00080547476,
+              -0.047578912,
+              0.038795993,
+              -0.06757743,
+              0.016941966,
+              0.036312684,
+              0.0125779435,
+              -0.058240637,
+              0.004471269,
+              0.03226526,
+              0.09821741,
+              0.053010236,
+              -0.016268
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 8,
+          "total_tokens": 8
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/506216767e53ce1a6ef47637a97d4baa865eda04d9d92e418a7e58da7be1bc2b.json
+++ b/tests/integration/vector_io/recordings/506216767e53ce1a6ef47637a97d4baa865eda04d9d92e418a7e58da7be1bc2b.json
@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_create_and_retrieve[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.009745733,
+              0.03363038,
+              -0.10852256,
+              0.026609829,
+              -0.0060599064,
+              -0.020473678,
+              0.0692486,
+              0.032276765,
+              -0.11532835,
+              -0.0005207133,
+              0.11814916,
+              0.0119809555,
+              0.03685765,
+              -0.10744223,
+              -0.046515625,
+              0.0015449532,
+              -0.06319664,
+              -0.04640812,
+              -0.037318822,
+              -0.025718328,
+              -0.00026058854,
+              -0.011890766,
+              -0.050925612,
+              0.014111713,
+              0.029467698,
+              0.006379121,
+              -0.012013293,
+              -0.0024293982,
+              -0.044318773,
+              -0.08100101,
+              0.02009568,
+              0.055713937,
+              0.078816675,
+              0.054973654,
+              0.20367871,
+              -0.004309458,
+              0.03877001,
+              0.03825522,
+              -0.002538199,
+              -0.0007973801,
+              0.044761047,
+              -0.054529082,
+              -0.008856888,
+              -0.04078078,
+              0.011367262,
+              -0.022404457,
+              -0.06209053,
+              0.02558725,
+              -0.0034454092,
+              -0.03743928,
+              -0.062026348,
+              -0.030812219,
+              -0.034592565,
+              -0.014926672,
+              0.018588377,
+              0.013435887,
+              0.08169151,
+              0.053658403,
+              -0.03557856,
+              0.033325985,
+              -0.01637577,
+              -0.0222152,
+              -0.039247517,
+              0.00094368146,
+              0.10228945,
+              -0.04305617,
+              -0.052200828,
+              -0.02007385,
+              0.054805383,
+              -0.08231377,
+              -0.14736547,
+              0.048954617,
+              -0.0212168,
+              0.02872658,
+              -0.0671409,
+              0.021436114,
+              -0.023599947,
+              0.03677982,
+              0.010577411,
+              -0.0966004,
+              -0.06367233,
+              -0.10277648,
+              0.0273993,
+              -0.06292906,
+              -0.046344172,
+              0.039919835,
+              0.02682899,
+              0.025460077,
+              -0.013083559,
+              -0.002667712,
+              -0.016529463,
+              0.012605053,
+              -0.0064383023,
+              0.015841383,
+              -0.01710707,
+              0.12320292,
+              -0.0077660284,
+              0.05845043,
+              0.07362552,
+              0.038426086,
+              0.004742023,
+              -0.0155985365,
+              0.01418979,
+              0.07865995,
+              -0.026352523,
+              -0.037174653,
+              0.06787817,
+              -0.060126718,
+              0.06111402,
+              -0.034931272,
+              -0.009446326,
+              -0.006150886,
+              0.02892313,
+              -0.09361577,
+              0.0335364,
+              -0.09088912,
+              0.009241144,
+              0.07092964,
+              -0.08954648,
+              0.04494549,
+              0.040462427,
+              -0.04167353,
+              0.0076030386,
+              -0.0066417656,
+              -0.07275736,
+              -0.043690544,
+              0.07685007,
+              -1.0508795e-33,
+              -0.019583685,
+              -0.13087204,
+              -0.03574564,
+              0.070223756,
+              0.08133056,
+              -0.009436003,
+              0.046778366,
+              0.03478148,
+              -0.09441185,
+              -0.040857755,
+              -0.02127058,
+              -0.106959894,
+              0.024023255,
+              0.022780996,
+              -0.09042505,
+              -0.035755932,
+              0.011359196,
+              0.050059184,
+              0.0050815986,
+              -0.07676938,
+              0.05453651,
+              0.04191775,
+              -0.009206564,
+              -0.022437057,
+              -0.04617258,
+              -0.038608693,
+              -0.00036489012,
+              -0.025092375,
+              0.039146807,
+              -0.0072839926,
+              0.03675482,
+              -0.011301064,
+              -0.08863303,
+              0.059421506,
+              0.015851071,
+              0.033407707,
+              0.056883834,
+              -0.01203776,
+              0.027333334,
+              -0.009560535,
+              -0.05030555,
+              -0.009787559,
+              0.023205005,
+              -0.007937716,
+              0.003991047,
+              -0.036422852,
+              -0.06979188,
+              0.046075627,
+              0.056377746,
+              0.0071927872,
+              -0.00020658698,
+              0.017678235,
+              0.023745935,
+              -0.0031295705,
+              0.016370842,
+              0.027585855,
+              -0.03440131,
+              -0.05594279,
+              0.036442764,
+              0.03577988,
+              -0.005324585,
+              0.015240975,
+              -0.09071462,
+              0.072764605,
+              0.02343818,
+              -0.093097225,
+              0.05842133,
+              -0.061913762,
+              0.045556016,
+              0.07639311,
+              -0.035199754,
+              -0.009256856,
+              0.038682748,
+              -0.040795818,
+              0.017686425,
+              -0.025513103,
+              0.06860537,
+              0.085520275,
+              -0.1023457,
+              -0.0036474275,
+              -0.014826131,
+              -0.05045756,
+              -0.09065474,
+              -0.076476775,
+              -0.008538021,
+              -0.04111943,
+              -0.035473913,
+              -0.061549038,
+              0.114327826,
+              -0.09601482,
+              0.022990143,
+              0.0022396755,
+              -0.023026146,
+              -0.028128328,
+              0.07969127,
+              -4.1765383e-34,
+              0.07866384,
+              0.11484068,
+              0.016687382,
+              0.009315677,
+              0.01664128,
+              0.024303248,
+              0.046507504,
+              -0.043804675,
+              -0.09136995,
+              0.106353745,
+              -0.06948852,
+              0.018747667,
+              0.0053492193,
+              -0.033229355,
+              0.042339083,
+              -0.0017468681,
+              0.05323157,
+              0.0058223205,
+              -0.05331342,
+              0.016506517,
+              -0.02325185,
+              0.097519755,
+              -0.0045558517,
+              0.08866843,
+              -0.028221445,
+              -0.012007969,
+              -0.009742725,
+              0.061458003,
+              0.01574456,
+              -0.00039456616,
+              0.02444834,
+              0.065891184,
+              -0.054779086,
+              0.04863689,
+              0.043890025,
+              -0.062467597,
+              0.07615393,
+              0.0067509366,
+              0.019150084,
+              0.06994535,
+              0.027900916,
+              0.08902746,
+              -0.027433047,
+              0.031390887,
+              0.02271287,
+              0.08119532,
+              0.06855678,
+              0.0023552915,
+              -0.06764184,
+              0.00704173,
+              -0.034521427,
+              -0.053785548,
+              -0.03075216,
+              0.007947864,
+              -0.025317406,
+              -0.040664013,
+              0.036144093,
+              0.017730465,
+              -0.040179063,
+              0.013665757,
+              0.004815376,
+              0.009095556,
+              0.0072483593,
+              0.012753351,
+              -0.047865536,
+              -0.046072423,
+              -0.014048283,
+              0.031082962,
+              -0.034945205,
+              -0.023550391,
+              0.033062257,
+              -0.022966444,
+              0.007744228,
+              0.015939556,
+              -0.0012224894,
+              0.0010534802,
+              -0.015109,
+              -0.021597888,
+              -0.029862719,
+              0.03983828,
+              0.062536344,
+              0.0106168175,
+              -0.027220478,
+              0.02410377,
+              -0.0023566757,
+              0.085310005,
+              0.04843323,
+              0.090823516,
+              0.005126319,
+              0.020297319,
+              -0.01739127,
+              0.047677357,
+              0.11080086,
+              0.030030197,
+              0.029773563,
+              -1.5454503e-08,
+              -0.03580758,
+              -0.12177604,
+              0.019753791,
+              0.05854353,
+              -0.01590761,
+              0.085781366,
+              -0.09558486,
+              -0.0016744126,
+              0.00773199,
+              -0.04790156,
+              0.01175936,
+              0.006536077,
+              -0.032027386,
+              0.0031026274,
+              -0.07580574,
+              -0.039700802,
+              -0.00170645,
+              -0.070955865,
+              0.043680355,
+              0.029966798,
+              0.0039943648,
+              0.031923376,
+              0.08119928,
+              0.038820695,
+              0.013302812,
+              0.041675337,
+              0.044349737,
+              0.060403902,
+              -0.1058191,
+              -0.05287386,
+              0.050275758,
+              0.039101604,
+              0.0599918,
+              -0.025067834,
+              -0.019554066,
+              0.06748813,
+              0.12508559,
+              0.059007537,
+              -0.019899847,
+              -0.030194808,
+              -0.046559453,
+              0.034567222,
+              -0.021644907,
+              -0.03327634,
+              -0.0075667608,
+              -0.100658834,
+              -0.0639619,
+              -0.055270903,
+              -0.0111757815,
+              -0.11671873,
+              -0.07208087,
+              0.023208033,
+              0.027215267,
+              0.063635156,
+              -0.05858023,
+              0.020345282,
+              0.018325811,
+              -0.0036095325,
+              0.006916675,
+              0.06541716,
+              0.009575581,
+              0.046839867,
+              0.0070611075,
+              -0.09470841
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/54f7bec4d7073965af5f612d096c1c82f2602f796edcdbf8c9813a5a3a82825b.json
+++ b/tests/integration/vector_io/recordings/54f7bec4d7073965af5f612d096c1c82f2602f796edcdbf8c9813a5a3a82825b.json
@ -0,0 +1,39 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_retrieve_contents[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-06T16:41:45.231544-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/8158c78a51cf32f35b849dd054077757f7abbd584a52b47259fb0a903922eec0.json
+++ b/tests/integration/vector_io/recordings/8158c78a51cf32f35b849dd054077757f7abbd584a52b47259fb0a903922eec0.json
@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_create_and_retrieve[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch test file 0"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.020637129,
+              0.048547756,
+              -0.12516363,
+              0.01991118,
+              -0.006535745,
+              -0.017178575,
+              0.027727997,
+              0.032170568,
+              -0.07302972,
+              0.008939002,
+              0.11493648,
+              0.0058907545,
+              0.0058539375,
+              -0.077171296,
+              -0.06883132,
+              0.0039748913,
+              -0.046849657,
+              -0.072902456,
+              -0.010890429,
+              -0.0019311906,
+              -0.011614798,
+              0.003689495,
+              -0.03695609,
+              -0.009029024,
+              0.017461002,
+              -0.004713484,
+              -0.010254731,
+              -0.026636763,
+              -0.026125714,
+              -0.046913657,
+              0.017024228,
+              0.0713477,
+              0.07881179,
+              0.03789051,
+              0.21716279,
+              -0.0077837943,
+              0.04686894,
+              0.020414647,
+              7.314368e-05,
+              0.0103133675,
+              0.059848394,
+              -0.04321678,
+              -0.011937493,
+              -0.021149047,
+              0.021315353,
+              -0.00072822213,
+              -0.046116166,
+              -0.0046820445,
+              0.016943695,
+              -0.03249135,
+              -0.055184096,
+              4.1543382e-05,
+              -0.034172166,
+              -0.023247559,
+              0.020267941,
+              0.012827845,
+              0.065036125,
+              0.07180022,
+              -0.013490698,
+              0.06376413,
+              -0.017730094,
+              -0.01806601,
+              -0.034191083,
+              0.008955718,
+              0.098446764,
+              -0.0061265854,
+              -0.06815829,
+              -0.039525956,
+              0.060588058,
+              -0.094874755,
+              -0.11774928,
+              0.019538416,
+              -0.014697532,
+              0.04773719,
+              -0.061298393,
+              0.030337377,
+              -0.0022184649,
+              0.019007793,
+              0.024370821,
+              -0.07063359,
+              -0.07582954,
+              -0.10816809,
+              0.031845964,
+              -0.057830192,
+              -0.04169559,
+              0.0752806,
+              0.019289386,
+              0.028845867,
+              0.0077010663,
+              0.013930818,
+              -0.067987345,
+              0.012679873,
+              -0.07907268,
+              0.0143718915,
+              -0.021433424,
+              0.11880779,
+              -0.016258432,
+              0.07099568,
+              0.035778854,
+              0.028776454,
+              0.013304291,
+              -0.05192297,
+              0.026758345,
+              0.10282426,
+              -0.003306269,
+              -0.03239622,
+              0.083044045,
+              -0.0412691,
+              0.043435257,
+              -0.043423533,
+              -0.013239603,
+              -0.0029038454,
+              0.038365215,
+              -0.10401672,
+              0.012744224,
+              -0.122984126,
+              -0.008942817,
+              0.06162198,
+              -0.120285526,
+              0.043005254,
+              0.04814879,
+              -0.036352232,
+              -0.003885529,
+              -0.018503373,
+              -0.088186465,
+              -0.0031517749,
+              0.09290919,
+              -1.1695094e-33,
+              -0.015589721,
+              -0.13189551,
+              0.008088751,
+              0.06899503,
+              0.07353927,
+              -0.030646399,
+              0.05110342,
+              0.03081624,
+              -0.07850498,
+              -0.021147482,
+              0.00017823944,
+              -0.10502706,
+              0.030078856,
+              0.02572523,
+              -0.068158925,
+              -0.025015576,
+              0.021830637,
+              0.049748335,
+              0.01520941,
+              -0.080153145,
+              0.06796621,
+              0.021865685,
+              -0.034017574,
+              -0.030821111,
+              -0.048006665,
+              0.0005615041,
+              -0.0137883695,
+              -0.04500587,
+              0.015368256,
+              -0.0043663937,
+              0.037706476,
+              0.0049090013,
+              -0.06216566,
+              0.03060772,
+              0.030548712,
+              0.029262561,
+              0.020701125,
+              0.0056516766,
+              0.010610447,
+              0.019530762,
+              -0.05664136,
+              -0.022654066,
+              -0.0010107337,
+              -0.020805702,
+              -0.012242364,
+              -0.05591731,
+              -0.049421698,
+              0.024721064,
+              0.05803342,
+              0.010474127,
+              -0.008790625,
+              0.025362873,
+              0.020258408,
+              0.004368581,
+              -0.01018003,
+              0.012385932,
+              -0.037656736,
+              -0.05642639,
+              0.020923307,
+              0.022813153,
+              -0.005735433,
+              0.015326356,
+              -0.108707875,
+              0.048076265,
+              0.023256551,
+              -0.10311626,
+              0.061980195,
+              -0.07340407,
+              0.051583096,
+              0.07360003,
+              -0.029443117,
+              -0.014564469,
+              0.042043358,
+              -0.020252181,
+              0.0147808045,
+              -0.0285806,
+              0.07891856,
+              0.056849223,
+              -0.106308356,
+              0.0197874,
+              0.0269322,
+              -0.04749746,
+              -0.066681586,
+              -0.10474516,
+              0.012599429,
+              -0.056163482,
+              -0.04901015,
+              -0.04571026,
+              0.09704481,
+              -0.105899766,
+              0.044303197,
+              -0.020125533,
+              -0.0368709,
+              -0.015417924,
+              0.042297333,
+              -8.289866e-35,
+              0.07415767,
+              0.10998298,
+              -0.016995763,
+              0.01066263,
+              -0.0012327223,
+              0.028000232,
+              0.0714317,
+              -0.02320065,
+              -0.07778205,
+              0.11864239,
+              -0.016559754,
+              0.037961867,
+              0.02930022,
+              -0.008237686,
+              0.059777655,
+              0.008086454,
+              0.02075205,
+              0.025284613,
+              -0.055471037,
+              0.0073576584,
+              -0.013398135,
+              0.11896543,
+              -0.014611002,
+              0.07691816,
+              -0.019711656,
+              -0.01920917,
+              -0.004744884,
+              0.08173054,
+              0.019665759,
+              -0.013193461,
+              0.06215852,
+              0.07420406,
+              -0.073212065,
+              0.036052067,
+              0.07328616,
+              -0.057373393,
+              0.08346425,
+              0.018834447,
+              0.03309735,
+              0.041197047,
+              0.033917964,
+              0.09151449,
+              -0.051731598,
+              0.049615093,
+              0.01124018,
+              0.06661862,
+              0.07268375,
+              -0.013245848,
+              -0.039673895,
+              -0.012173254,
+              0.0017787582,
+              -0.05746287,
+              -0.013884767,
+              0.020205025,
+              -0.029692367,
+              -0.031010685,
+              0.0149556715,
+              0.026381323,
+              -0.025382591,
+              0.0074336748,
+              -0.00949915,
+              0.015655186,
+              -0.0012397208,
+              -0.0032508406,
+              -0.046632554,
+              -0.0030316226,
+              -0.007273208,
+              0.064231135,
+              -0.034431897,
+              -0.06433184,
+              0.045421343,
+              -0.010773523,
+              -0.017881984,
+              0.010312532,
+              -0.024369273,
+              -0.008478495,
+              -0.02457377,
+              -0.0263535,
+              -0.027263613,
+              0.047060315,
+              0.08128726,
+              0.0045517692,
+              -0.010821656,
+              0.026526682,
+              0.018961033,
+              0.059243083,
+              0.001561823,
+              0.09838158,
+              0.00822081,
+              0.008796511,
+              -0.0060577285,
+              0.028892087,
+              0.08253284,
+              0.049560018,
+              0.023363132,
+              -1.498271e-08,
+              -0.036891207,
+              -0.10629833,
+              0.030452948,
+              0.049268734,
+              -0.0030453752,
+              0.07413954,
+              -0.07043819,
+              -0.034285706,
+              -0.009679971,
+              -0.046219327,
+              0.013510038,
+              -0.018686565,
+              -0.048570327,
+              0.0028313443,
+              -0.06190722,
+              -0.053201936,
+              0.0060967463,
+              -0.043467365,
+              0.042226154,
+              0.03455835,
+              -0.0375257,
+              0.023590367,
+              0.054896712,
+              0.029878648,
+              0.019286606,
+              0.026097741,
+              0.06938145,
+              0.06272366,
+              -0.09566521,
+              -0.07481147,
+              0.025204772,
+              0.039396077,
+              0.036375154,
+              -0.01104443,
+              -0.028223084,
+              0.111878626,
+              0.13400707,
+              0.06680113,
+              -0.011737675,
+              -0.03585406,
+              -0.07978788,
+              0.032793757,
+              -0.0021075818,
+              -0.028365146,
+              -0.042218164,
+              -0.08132239,
+              -0.0753423,
+              -0.043771427,
+              -0.015633285,
+              -0.14193884,
+              -0.055949364,
+              0.025526602,
+              -0.023186589,
+              0.061106257,
+              -0.056208834,
+              0.00838827,
+              0.014720396,
+              -0.014650135,
+              -0.012830787,
+              0.08434067,
+              0.024660436,
+              0.05366935,
+              0.005782819,
+              -0.10599063
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/943a7db9bab0934c95417e8befe710b364496c1ee21a75258205830e1df7221b.json
+++ b/tests/integration/vector_io/recordings/943a7db9bab0934c95417e8befe710b364496c1ee21a75258205830e1df7221b.json
@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_cancel[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch cancel test file 0 with substantial content"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.010706507,
+              0.11740309,
+              -0.070396945,
+              0.036590267,
+              0.03445541,
+              -0.037278067,
+              0.033794403,
+              -0.013823747,
+              -0.032249726,
+              0.039381154,
+              0.09738964,
+              0.043944314,
+              -0.015195914,
+              -0.08339148,
+              -0.12092182,
+              -0.0144716315,
+              -0.06525938,
+              0.008907217,
+              -0.016506711,
+              -0.011929026,
+              -0.0519942,
+              0.07381637,
+              0.028294124,
+              0.056386005,
+              0.028838597,
+              0.02860147,
+              -0.046813786,
+              -0.018329943,
+              -0.037620317,
+              -0.06344129,
+              0.037448265,
+              0.0807444,
+              0.08218735,
+              -0.018610513,
+              0.16465282,
+              0.006478139,
+              0.009052014,
+              0.024081843,
+              0.04604129,
+              -0.016105218,
+              0.050088186,
+              -0.014189308,
+              -0.055208918,
+              -0.024689473,
+              0.009216049,
+              0.0032953622,
+              -0.08004139,
+              -0.050898325,
+              0.030319132,
+              0.0038868543,
+              -0.03242241,
+              -0.008002084,
+              -0.05405017,
+              0.0034951256,
+              0.026613077,
+              -0.03749797,
+              0.074383445,
+              0.05947148,
+              -0.037571322,
+              0.07424358,
+              -0.031258598,
+              -0.010979168,
+              -0.115162514,
+              0.016076973,
+              0.12323825,
+              0.057677355,
+              -0.08872208,
+              -0.028623635,
+              0.05342226,
+              -0.060159575,
+              -0.07479101,
+              -0.01794232,
+              -0.0049816607,
+              0.08948416,
+              -0.042007502,
+              0.0925552,
+              -0.016678093,
+              0.013261441,
+              -0.0068968083,
+              0.00078877964,
+              -0.070652686,
+              -0.14053895,
+              0.054617904,
+              -0.064937904,
+              -0.036082774,
+              0.04364618,
+              0.039191015,
+              0.009325763,
+              0.055350192,
+              0.007441803,
+              -0.04520714,
+              0.0070686075,
+              0.029522296,
+              0.016590035,
+              -0.020568646,
+              0.083674796,
+              0.0076218233,
+              0.006881344,
+              0.013654858,
+              0.03697504,
+              0.04504176,
+              -0.012595865,
+              -0.006368664,
+              -0.006188894,
+              -0.02347456,
+              -0.014876863,
+              0.07330545,
+              -0.008524341,
+              0.03080002,
+              -0.079184264,
+              -0.002168809,
+              -0.04496155,
+              0.02353669,
+              -0.061784163,
+              0.019026963,
+              -0.034334134,
+              0.07823938,
+              0.086644776,
+              -0.100164026,
+              0.00979978,
+              0.043132447,
+              -0.00027732752,
+              -0.007950898,
+              -0.03439145,
+              -0.07176784,
+              -0.010847044,
+              0.10318583,
+              1.28398045e-33,
+              -0.057539165,
+              -0.10064088,
+              -0.036363184,
+              0.070467934,
+              0.12267441,
+              0.023121687,
+              0.036528632,
+              0.043095388,
+              -0.053614546,
+              0.034320176,
+              -0.015772322,
+              -0.07880764,
+              0.019716268,
+              0.017762613,
+              -0.094458655,
+              -0.08139035,
+              0.027233537,
+              0.07888667,
+              -0.024265131,
+              -0.054107342,
+              0.11021126,
+              -0.016241824,
+              -0.05417309,
+              -0.028439889,
+              -0.027373016,
+              -0.01668086,
+              -0.031238388,
+              -0.03203346,
+              0.017995317,
+              -0.011522754,
+              -0.0029258654,
+              0.022844825,
+              -0.019639384,
+              0.05111425,
+              -0.0015511515,
+              0.04084381,
+              0.0043716393,
+              -0.05789265,
+              0.024110112,
+              0.03920258,
+              -0.08151888,
+              -0.008190904,
+              -0.0645496,
+              -0.014420588,
+              0.00016276255,
+              -0.10466175,
+              -0.015631696,
+              -0.054435816,
+              0.03390489,
+              0.042083304,
+              0.041493565,
+              0.033552594,
+              0.027098974,
+              -0.035584476,
+              -0.025616122,
+              0.015369336,
+              0.025080213,
+              -0.047622968,
+              0.0076927147,
+              0.048611037,
+              0.07658855,
+              0.030115629,
+              -0.10192636,
+              0.009031788,
+              -0.026905872,
+              -0.07093241,
+              0.009540495,
+              -0.0967732,
+              0.006907292,
+              0.008907563,
+              -0.036709655,
+              -0.0074325944,
+              0.06927971,
+              -0.044891518,
+              -0.0022573345,
+              -0.05632572,
+              0.03744841,
+              0.026788702,
+              -0.00916575,
+              0.008179489,
+              0.08744597,
+              -0.046512436,
+              -0.061149366,
+              -0.13555244,
+              0.0010608839,
+              -0.06323009,
+              -0.039003603,
+              -0.07015582,
+              0.03916791,
+              -0.07763432,
+              -0.00032964678,
+              -0.026286542,
+              -0.053487364,
+              0.009920836,
+              0.104119115,
+              -1.9471978e-33,
+              0.04772588,
+              0.04490678,
+              -0.04262699,
+              0.03524018,
+              -0.003943472,
+              0.033365145,
+              0.06762878,
+              -0.021556355,
+              -0.043953415,
+              0.023543492,
+              0.005500359,
+              0.03756542,
+              0.025656395,
+              -0.014806406,
+              0.01845547,
+              0.015662882,
+              0.06915146,
+              0.010516805,
+              -0.08958506,
+              0.008974718,
+              -0.035460126,
+              0.05160542,
+              0.01763933,
+              0.067841165,
+              -0.02522728,
+              -0.022180483,
+              -0.085712284,
+              0.061407775,
+              0.07101853,
+              -0.0015686463,
+              0.055281166,
+              0.04126171,
+              -0.04599903,
+              -0.037977487,
+              0.09936549,
+              -0.064348385,
+              0.07501729,
+              0.06690245,
+              0.01264843,
+              0.011582279,
+              0.06661292,
+              0.083571374,
+              -0.05528334,
+              0.03757593,
+              0.043382253,
+              0.059041474,
+              0.056976013,
+              -0.02765602,
+              -0.00018057597,
+              -0.010140114,
+              -0.023275468,
+              -0.040977187,
+              -0.0051338123,
+              0.06462851,
+              -0.015096949,
+              -0.04108323,
+              0.013806998,
+              -0.013243718,
+              -0.04096836,
+              -0.021470992,
+              0.0037039437,
+              0.04606251,
+              0.027378108,
+              -0.009201031,
+              0.024913032,
+              0.027817363,
+              0.011912681,
+              0.072464235,
+              -0.04599433,
+              -0.033524342,
+              0.031872187,
+              -0.0017134893,
+              -0.030329237,
+              0.021338675,
+              0.050125677,
+              -0.006607719,
+              0.005844466,
+              -0.049508642,
+              2.296406e-05,
+              0.033044532,
+              0.07586271,
+              0.0094868485,
+              -0.0023229877,
+              0.063257135,
+              0.0073867897,
+              0.067748606,
+              -0.088573374,
+              0.06831021,
+              0.0047544846,
+              0.08063805,
+              -0.02170177,
+              0.020645779,
+              0.082571074,
+              0.039116666,
+              0.03906674,
+              -1.756136e-08,
+              -0.01928442,
+              -0.123927765,
+              0.0188664,
+              0.03889619,
+              0.003943178,
+              0.017261649,
+              -0.072421774,
+              0.010595731,
+              -0.032426827,
+              -0.07068102,
+              0.027171727,
+              -0.032465994,
+              -0.03428293,
+              0.00012704723,
+              -0.07441139,
+              -0.061249517,
+              0.003310212,
+              -0.030616615,
+              0.037538156,
+              0.013060206,
+              -0.02899822,
+              0.002607385,
+              0.023053044,
+              -0.008261543,
+              0.027366797,
+              0.041916996,
+              0.07509514,
+              0.093088634,
+              -0.05660954,
+              -0.10259794,
+              0.041243467,
+              -0.025973666,
+              0.013900956,
+              0.0023358895,
+              -0.075266555,
+              0.07490993,
+              0.14500652,
+              0.04697599,
+              -0.03860971,
+              0.009254478,
+              -0.06991552,
+              0.011762797,
+              0.02150895,
+              0.010407091,
+              -0.016874894,
+              -0.057741348,
+              -0.075219,
+              -0.07250321,
+              -0.03090426,
+              -0.110799745,
+              -0.024827298,
+              0.0065941666,
+              -0.027638538,
+              0.08827356,
+              -0.044589255,
+              -0.04193462,
+              0.021976525,
+              0.015851181,
+              -0.07105447,
+              0.106275305,
+              0.058465168,
+              0.0026831257,
+              -0.006616897,
+              -0.086507544
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 10,
+          "total_tokens": 10
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/995712d2e4441339fdd8ca21d87747c9983b0d40cc83fcfd90c5e733ecfb5a35.json
+++ b/tests/integration/vector_io/recordings/995712d2e4441339fdd8ca21d87747c9983b0d40cc83fcfd90c5e733ecfb5a35.json
@ -0,0 +1,39 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_cancel[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-10-06T16:40:13.262640-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/da1e7b0f80936e70deaa09b6678d0f2756377d5ed83818898fd4c4c67df8ade6.json
+++ b/tests/integration/vector_io/recordings/da1e7b0f80936e70deaa09b6678d0f2756377d5ed83818898fd4c4c67df8ade6.json
@ -0,0 +1,423 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_cancel[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is batch cancel test file 1 with substantial content"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.024848156,
+              0.10927085,
+              -0.0545053,
+              0.038470518,
+              0.046556868,
+              -0.034411646,
+              0.04878781,
+              -0.014318634,
+              -0.046015147,
+              0.044597667,
+              0.09629065,
+              0.058968317,
+              -0.007982022,
+              -0.10140896,
+              -0.10389055,
+              -0.019553911,
+              -0.07593768,
+              0.025729634,
+              -0.029175822,
+              -0.02637171,
+              -0.050457876,
+              0.066799924,
+              0.022711048,
+              0.06541894,
+              0.041600663,
+              0.030976223,
+              -0.056684654,
+              -0.0035002322,
+              -0.050632603,
+              -0.08931927,
+              0.040150054,
+              0.06798157,
+              0.08541512,
+              -0.0107848635,
+              0.15392521,
+              0.009335848,
+              0.010962297,
+              0.029146858,
+              0.047823314,
+              -0.026440151,
+              0.04159767,
+              -0.010160618,
+              -0.05779408,
+              -0.039702307,
+              -0.004494967,
+              -0.004617397,
+              -0.08862508,
+              -0.034483556,
+              0.024042498,
+              0.0051155766,
+              -0.0317056,
+              -0.01699217,
+              -0.053024635,
+              0.015636722,
+              0.03557156,
+              -0.039649993,
+              0.081902996,
+              0.06176357,
+              -0.05502012,
+              0.06357122,
+              -0.030193875,
+              -0.012515638,
+              -0.12543206,
+              0.012063709,
+              0.12448795,
+              0.040869392,
+              -0.07753088,
+              -0.021475459,
+              0.04500842,
+              -0.056871727,
+              -0.09496841,
+              -0.01180043,
+              -0.0017254521,
+              0.08008634,
+              -0.047713377,
+              0.08782804,
+              -0.02004271,
+              0.033268984,
+              -0.016207146,
+              -0.010731495,
+              -0.063805684,
+              -0.14302677,
+              0.0575187,
+              -0.06904251,
+              -0.037962824,
+              0.0182337,
+              0.042886198,
+              0.01039097,
+              0.044122625,
+              -0.0020459748,
+              -0.014757414,
+              0.0011372506,
+              0.07999029,
+              0.018020395,
+              -0.018433796,
+              0.07817236,
+              0.012330995,
+              0.007078602,
+              0.03731257,
+              0.03993665,
+              0.039117657,
+              0.0077354074,
+              -0.009170466,
+              -0.018691367,
+              -0.028763011,
+              -0.019665359,
+              0.062140632,
+              -0.020356707,
+              0.038877316,
+              -0.08305566,
+              0.00014209712,
+              -0.05700167,
+              0.021387467,
+              -0.054998472,
+              0.03538585,
+              -0.023105556,
+              0.089621656,
+              0.09418147,
+              -0.08390289,
+              0.009763535,
+              0.043676704,
+              -0.0022283366,
+              0.00070641236,
+              -0.03374215,
+              -0.07274797,
+              -0.034256138,
+              0.09228734,
+              1.2329422e-33,
+              -0.06229734,
+              -0.10348473,
+              -0.05939012,
+              0.07817319,
+              0.12856846,
+              0.03253048,
+              0.03706221,
+              0.03843275,
+              -0.06781762,
+              0.027851813,
+              -0.03286515,
+              -0.07305933,
+              0.011496317,
+              0.016992282,
+              -0.10859345,
+              -0.089275,
+              0.02053902,
+              0.07540007,
+              -0.030434899,
+              -0.057486024,
+              0.1028371,
+              -0.011332772,
+              -0.040277272,
+              -0.022627348,
+              -0.029583039,
+              -0.042487655,
+              -0.01710431,
+              -0.028937005,
+              0.034644134,
+              -0.015131404,
+              -0.005402634,
+              0.0111823045,
+              -0.024323324,
+              0.061144948,
+              -0.0068504023,
+              0.04550556,
+              0.017341396,
+              -0.063010655,
+              0.033939265,
+              0.029030005,
+              -0.07075115,
+              0.0076140417,
+              -0.056033216,
+              -0.01839173,
+              0.006444027,
+              -0.10148905,
+              -0.024238782,
+              -0.045753844,
+              0.029873326,
+              0.03732028,
+              0.05342056,
+              0.024428835,
+              0.03200607,
+              -0.045322895,
+              -0.009412481,
+              0.01895284,
+              0.026068604,
+              -0.043451786,
+              0.017836504,
+              0.060751975,
+              0.0770648,
+              0.037520513,
+              -0.094844334,
+              0.018022675,
+              -0.028010713,
+              -0.05970307,
+              0.0042470302,
+              -0.08537647,
+              0.0025366507,
+              0.0059753954,
+              -0.040670317,
+              -0.008420785,
+              0.070101276,
+              -0.05581281,
+              0.009997155,
+              -0.053269707,
+              0.030278698,
+              0.034753144,
+              -0.0069992156,
+              -0.0018294669,
+              0.052869115,
+              -0.047554925,
+              -0.07009094,
+              -0.12028551,
+              -0.016411684,
+              -0.0558196,
+              -0.026485136,
+              -0.07406597,
+              0.052336086,
+              -0.07966716,
+              -0.009600498,
+              -0.016012779,
+              -0.04670444,
+              0.0040856744,
+              0.13087922,
+              -1.9130171e-33,
+              0.04951988,
+              0.04144521,
+              -0.030660233,
+              0.02966906,
+              -0.0019053655,
+              0.038034633,
+              0.053598672,
+              -0.03873592,
+              -0.050682254,
+              0.0163216,
+              -0.018117629,
+              0.02705123,
+              0.014957701,
+              -0.029251544,
+              0.010732444,
+              0.01150037,
+              0.08527361,
+              0.000666767,
+              -0.09031944,
+              0.007236525,
+              -0.0394124,
+              0.032647807,
+              0.029387591,
+              0.0696317,
+              -0.028400488,
+              -0.019728381,
+              -0.08580391,
+              0.050916594,
+              0.07555233,
+              0.0013333871,
+              0.036405865,
+              0.03485496,
+              -0.035891958,
+              -0.03518406,
+              0.08422707,
+              -0.07100648,
+              0.066512264,
+              0.0566844,
+              0.005254722,
+              0.026210023,
+              0.06271422,
+              0.07715752,
+              -0.042685844,
+              0.029498853,
+              0.048694577,
+              0.06829996,
+              0.05471948,
+              -0.014717811,
+              -0.0084376065,
+              -0.007800526,
+              -0.033968475,
+              -0.035792083,
+              -0.01680357,
+              0.056615632,
+              -0.008940466,
+              -0.044396702,
+              0.033141203,
+              -0.020710811,
+              -0.052891865,
+              -0.012946567,
+              0.013425288,
+              0.045469046,
+              0.02655372,
+              -7.159544e-06,
+              0.033383444,
+              0.012771919,
+              0.0050781234,
+              0.05739414,
+              -0.05292731,
+              -0.009027621,
+              0.019719183,
+              -0.0046205786,
+              -0.012921344,
+              0.021115582,
+              0.063510135,
+              0.006540324,
+              0.008657973,
+              -0.044172782,
+              -0.0010352373,
+              0.025917202,
+              0.07357742,
+              0.012915724,
+              -0.010159995,
+              0.05862044,
+              0.0032137444,
+              0.08368076,
+              -0.06552963,
+              0.06294139,
+              0.004963379,
+              0.08497223,
+              -0.030302247,
+              0.028541481,
+              0.103464715,
+              0.03432187,
+              0.039947473,
+              -1.757192e-08,
+              -0.020163277,
+              -0.12507844,
+              0.015846072,
+              0.038265407,
+              -0.0031526515,
+              0.01804952,
+              -0.0817553,
+              0.030486222,
+              -0.02073271,
+              -0.069118954,
+              0.0252006,
+              -0.016496325,
+              -0.018695008,
+              -0.0063493066,
+              -0.08448383,
+              -0.05474651,
+              0.008191211,
+              -0.04699509,
+              0.03820692,
+              0.019186925,
+              -0.006977571,
+              -0.0002934883,
+              0.030278133,
+              -0.009153849,
+              0.030300315,
+              0.04737054,
+              0.06026962,
+              0.09765302,
+              -0.05529498,
+              -0.09553832,
+              0.06008278,
+              -0.025960611,
+              0.034287665,
+              -0.012333093,
+              -0.07106284,
+              0.05141244,
+              0.14179605,
+              0.04709826,
+              -0.049292527,
+              0.014455253,
+              -0.047851674,
+              0.011403938,
+              0.014072481,
+              0.010494679,
+              -0.0009859774,
+              -0.06089218,
+              -0.07293921,
+              -0.07961594,
+              -0.03404924,
+              -0.10086713,
+              -0.031331882,
+              0.0042822976,
+              -0.0045380252,
+              0.09583955,
+              -0.044172354,
+              -0.034359995,
+              0.023726532,
+              0.02167657,
+              -0.06509328,
+              0.09268318,
+              0.055370033,
+              0.003980954,
+              -0.0053826002,
+              -0.07774321
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 10,
+          "total_tokens": 10
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/recordings/ffea5475c2625b87e302ec419cc536f34da3ce7e80eba86bec16d231aa347d00.json
+++ b/tests/integration/vector_io/recordings/ffea5475c2625b87e302ec419cc536f34da3ce7e80eba86bec16d231aa347d00.json
@ -0,0 +1,20 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_file_batch_create_and_retrieve[client_with_models-ollama/llama3.2:3b-instruct-fp16-None-ollama/all-minilm:l6-v2-None-384]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/ps",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/ps",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ProcessResponse",
+      "__data__": {
+        "models": []
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -902,3 +902,290 @@ def test_openai_vector_store_search_modes(llama_stack_client, client_with_models
        search_mode=search_mode,
    )
    assert search_response is not None
+
+
+def test_openai_vector_store_file_batch_create_and_retrieve(compat_client_with_empty_stores, client_with_models):
+    """Test creating and retrieving a vector store file batch."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_test_store")
+
+    # Create multiple files
+    file_ids = []
+    for i in range(2):
+        with BytesIO(f"This is batch test file {i}".encode()) as file_buffer:
+            file_buffer.name = f"batch_test_{i}.txt"
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    assert batch is not None
+    assert batch.object == "vector_store.file_batch"
+    assert batch.vector_store_id == vector_store.id
+    assert batch.status in ["in_progress", "completed"]
+    assert batch.file_counts.total == len(file_ids)
+    assert hasattr(batch, "id")
+    assert hasattr(batch, "created_at")
+
+    # Wait for batch processing to complete
+    max_retries = 60  # 60 seconds max wait (increased for file processing delays)
+    retries = 0
+    retrieved_batch = None
+    while retries < max_retries:
+        retrieved_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        if retrieved_batch.status in ["completed", "failed"]:
+            break
+        time.sleep(1)
+        retries += 1
+
+    assert retrieved_batch is not None
+    assert retrieved_batch.id == batch.id
+    assert retrieved_batch.vector_store_id == vector_store.id
+    assert retrieved_batch.object == "vector_store.file_batch"
+    assert retrieved_batch.file_counts.total == len(file_ids)
+    assert retrieved_batch.status == "completed"  # Should be completed after processing
+
+
+def test_openai_vector_store_file_batch_list_files(compat_client_with_empty_stores, client_with_models):
+    """Test listing files in a vector store file batch."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_list_test_store")
+
+    # Create multiple files
+    file_ids = []
+    for i in range(2):
+        with BytesIO(f"This is batch list test file {i}".encode()) as file_buffer:
+            file_buffer.name = f"batch_list_test_{i}.txt"
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    # Wait for batch processing to complete
+    max_retries = 60  # 60 seconds max wait (increased for file processing delays)
+    retries = 0
+    while retries < max_retries:
+        retrieved_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        if retrieved_batch.status in ["completed", "failed"]:
+            break
+        time.sleep(1)
+        retries += 1
+
+    # List all files in the batch
+    files_response = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+    )
+
+    assert files_response is not None
+    assert files_response.object == "list"
+    assert hasattr(files_response, "data")
+    assert len(files_response.data) == len(file_ids)
+
+    # Verify all files are in the response
+    response_file_ids = {file.id for file in files_response.data}
+    assert response_file_ids == set(file_ids)
+
+    # Test pagination with limit
+    limited_response = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+        limit=3,
+    )
+
+    assert len(limited_response.data) == 2
+    assert limited_response.has_more is False
+
+    # Test pagination with after cursor
+    first_page = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+        limit=2,
+    )
+
+    second_page = compat_client.vector_stores.file_batches.list_files(
+        vector_store_id=vector_store.id,
+        batch_id=batch.id,
+        limit=2,
+        after=first_page.data[-1].id,
+    )
+
+    assert len(first_page.data) == 2
+    assert len(second_page.data) <= 3  # Should be <= remaining files
+    # Ensure no overlap between pages
+    first_page_ids = {file.id for file in first_page.data}
+    second_page_ids = {file.id for file in second_page.data}
+    assert first_page_ids.isdisjoint(second_page_ids)
+
+
+def test_openai_vector_store_file_batch_cancel(compat_client_with_empty_stores, client_with_models):
+    """Test cancelling a vector store file batch."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_cancel_test_store")
+
+    # Create a batch to test cancellation
+    file_ids = []
+    for i in range(2):  # Batch size that allows time for cancellation
+        with BytesIO(f"This is batch cancel test file {i} with substantial content".encode()) as file_buffer:
+            file_buffer.name = f"batch_cancel_test_{i}.txt"
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    try:
+        # Cancel the batch immediately after creation
+        cancelled_batch = compat_client.vector_stores.file_batches.cancel(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+
+        assert cancelled_batch is not None
+        assert cancelled_batch.id == batch.id
+        assert cancelled_batch.vector_store_id == vector_store.id
+        assert cancelled_batch.status == "cancelled"
+        assert cancelled_batch.object == "vector_store.file_batch"
+    except Exception:
+        # If cancellation fails (e.g., batch completed too quickly),
+        # verify the batch reached completion instead
+        final_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        assert final_batch.status in ["completed", "cancelled"]
+
+
+def test_openai_vector_store_file_batch_retrieve_contents(compat_client_with_empty_stores, client_with_models):
+    """Test retrieving file contents after file batch processing."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_contents_test_store")
+
+    # Create multiple files with known content
+    file_data = [
+        ("test_file_1.txt", b"This is the content of test file 1"),
+        ("test_file_2.txt", b"This is the content of test file 2"),
+    ]
+
+    file_ids = []
+    for filename, content in file_data:
+        with BytesIO(content) as file_buffer:
+            file_buffer.name = filename
+            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file_ids.append(file.id)
+
+    # Create a file batch
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    # Wait for batch processing to complete
+    max_retries = 60  # 60 seconds max wait (increased for file processing delays)
+    retries = 0
+    while retries < max_retries:
+        retrieved_batch = compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id=batch.id,
+        )
+        if retrieved_batch.status in ["completed", "failed"]:
+            break
+        time.sleep(1)
+        retries += 1
+
+    assert retrieved_batch.status == "completed"
+
+    # Retrieve file contents for each file in the batch
+    for i, file_id in enumerate(file_ids):
+        file_contents = compat_client.vector_stores.files.content(
+            vector_store_id=vector_store.id,
+            file_id=file_id,
+        )
+
+        assert file_contents is not None
+        assert file_contents.filename == file_data[i][0]
+        assert len(file_contents.content) > 0
+
+        # Verify the content matches what we uploaded
+        content_text = (
+            file_contents.content[0].text
+            if hasattr(file_contents.content[0], "text")
+            else file_contents.content[0]["text"]
+        )
+        assert file_data[i][1].decode("utf-8") in content_text
+
+
+def test_openai_vector_store_file_batch_error_handling(compat_client_with_empty_stores, client_with_models):
+    """Test error handling for file batch operations."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+
+    # Create a vector store
+    vector_store = compat_client.vector_stores.create(name="batch_error_test_store")
+
+    # Test with invalid file IDs (should handle gracefully)
+    file_ids = ["invalid_file_id_1", "invalid_file_id_2"]
+
+    batch = compat_client.vector_stores.file_batches.create(
+        vector_store_id=vector_store.id,
+        file_ids=file_ids,
+    )
+
+    assert batch is not None
+    assert batch.file_counts.total == len(file_ids)
+    # Invalid files should be marked as failed
+    assert batch.file_counts.failed >= 0  # Implementation may vary
+
+    # Determine expected errors based on client type
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        errors = ValueError
+    else:
+        errors = (BadRequestError, OpenAIBadRequestError)
+
+    # Test retrieving non-existent batch
+    with pytest.raises(errors):  # Should raise an error for non-existent batch
+        compat_client.vector_stores.file_batches.retrieve(
+            vector_store_id=vector_store.id,
+            batch_id="non_existent_batch_id",
+        )
+
+    # Test operations on non-existent vector store
+    with pytest.raises(errors):  # Should raise an error for non-existent vector store
+        compat_client.vector_stores.file_batches.create(
+            vector_store_id="non_existent_vector_store",
+            file_ids=["any_file_id"],
+        )
--- a/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
+++ b/tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
@ -8,6 +8,7 @@
 import pytest

 from llama_stack.apis.agents.openai_responses import (
+    OpenAIResponseAnnotationFileCitation,
    OpenAIResponseInputFunctionToolCallOutput,
    OpenAIResponseInputMessageContentImage,
    OpenAIResponseInputMessageContentText,
@ -35,6 +36,7 @@ from llama_stack.apis.inference import (
    OpenAIUserMessageParam,
 )
 from llama_stack.providers.inline.agents.meta_reference.responses.utils import (
+    _extract_citations_from_text,
    convert_chat_choice_to_response_message,
    convert_response_content_to_chat_content,
    convert_response_input_to_chat_messages,
@ -340,3 +342,26 @@ class TestIsFunctionToolCall:

        result = is_function_tool_call(tool_call, tools)
        assert result is False
+
+
+class TestExtractCitationsFromText:
+    def test_extract_citations_and_annotations(self):
+        text = "Start [not-a-file]. New source <|file-abc123|>. "
+        text += "Other source <|file-def456|>? Repeat source <|file-abc123|>! No citation."
+        file_mapping = {"file-abc123": "doc1.pdf", "file-def456": "doc2.txt"}
+
+        annotations, cleaned_text = _extract_citations_from_text(text, file_mapping)
+
+        expected_annotations = [
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=30),
+            OpenAIResponseAnnotationFileCitation(file_id="file-def456", filename="doc2.txt", index=44),
+            OpenAIResponseAnnotationFileCitation(file_id="file-abc123", filename="doc1.pdf", index=59),
+        ]
+        expected_clean_text = "Start [not-a-file]. New source. Other source? Repeat source! No citation."
+
+        assert cleaned_text == expected_clean_text
+        assert annotations == expected_annotations
+        # OpenAI cites at the end of the sentence
+        assert cleaned_text[expected_annotations[0].index] == "."
+        assert cleaned_text[expected_annotations[1].index] == "?"
+        assert cleaned_text[expected_annotations[2].index] == "!"
--- a/Show more
+++ b/Show more