feat: Adding OpenAI Compatible Prompts API

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-17 09:49:46 +00:00 · 2025-09-03 14:14:54 -04:00 · 2025-09-03 14:14:54 -04:00 · 8b00883abd
commit 8b00883abd
parent 30117dea22
181 changed files with 21356 additions and 10332 deletions
--- a/.gitignore
+++ b/.gitignore
@ -26,5 +26,7 @@ venv/
 pytest-report.xml
 .coverage
 .python-version
+AGENTS.md
+server.log
 CLAUDE.md
 .claude/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -86,7 +86,7 @@ repos:
        language: python
        pass_filenames: false
        require_serial: true
-        files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
+        files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
      - id: provider-codegen
        name: Provider Codegen
        additional_dependencies:
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -633,6 +633,80 @@
                }
            }
        },
+        "/v1/prompts": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A ListPromptsResponse containing all prompts.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ListPromptsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Prompts"
+                ],
+                "description": "List all prompts.",
+                "parameters": []
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The created Prompt resource.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Prompt"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Prompts"
+                ],
+                "description": "Create a new prompt.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CreatePromptRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/agents/{agent_id}": {
            "get": {
                "responses": {
@ -901,6 +975,143 @@
                ]
            }
        },
+        "/v1/prompts/{prompt_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "A Prompt resource.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Prompt"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Prompts"
+                ],
+                "description": "Get a prompt by its identifier and optional version.",
+                "parameters": [
+                    {
+                        "name": "prompt_id",
+                        "in": "path",
+                        "description": "The identifier of the prompt to get.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "version",
+                        "in": "query",
+                        "description": "The version of the prompt to get (defaults to latest).",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            },
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The updated Prompt resource with incremented version.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Prompt"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Prompts"
+                ],
+                "description": "Update an existing prompt (increments version).",
+                "parameters": [
+                    {
+                        "name": "prompt_id",
+                        "in": "path",
+                        "description": "The identifier of the prompt to update.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/UpdatePromptRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            },
+            "delete": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Prompts"
+                ],
+                "description": "Delete a prompt.",
+                "parameters": [
+                    {
+                        "name": "prompt_id",
+                        "in": "path",
+                        "description": "The identifier of the prompt to delete.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
        "/v1/inference/embeddings": {
            "post": {
                "responses": {
@ -4129,7 +4340,7 @@
                "tags": [
                    "Files"
                ],
-                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.",
+                "description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -4143,11 +4354,33 @@
                                    },
                                    "purpose": {
                                        "$ref": "#/components/schemas/OpenAIFilePurpose"
+                                    },
+                                    "expires_after_anchor": {
+                                        "oneOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "null"
+                                            }
+                                        ]
+                                    },
+                                    "expires_after_seconds": {
+                                        "oneOf": [
+                                            {
+                                                "type": "integer"
+                                            },
+                                            {
+                                                "type": "null"
+                                            }
+                                        ]
                                    }
                                },
                                "required": [
                                    "file",
-                                    "purpose"
+                                    "purpose",
+                                    "expires_after_anchor",
+                                    "expires_after_seconds"
                                ]
                            }
                        }
@ -4985,6 +5218,59 @@
                }
            }
        },
+        "/v1/prompts/{prompt_id}/default-version": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The prompt with the specified version now set as default.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Prompt"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Prompts"
+                ],
+                "description": "Set which version of a prompt should be the default in get_prompt (latest).",
+                "parameters": [
+                    {
+                        "name": "prompt_id",
+                        "in": "path",
+                        "description": "The identifier of the prompt.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/SetDefaultVersionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/post-training/supervised-fine-tune": {
            "post": {
                "responses": {
@ -9648,6 +9934,58 @@
                ],
                "title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
            },
+            "CreatePromptRequest": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The prompt text content with variable placeholders."
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "Dictionary of variable names to their default values."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompt"
+                ],
+                "title": "CreatePromptRequest"
+            },
+            "Prompt": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The system prompt text with variable placeholders. Variables are only supported when using the Responses API."
+                    },
+                    "version": {
+                        "type": "string",
+                        "description": "Version string (integer start at 1 cast as string, incremented on save)"
+                    },
+                    "prompt_id": {
+                        "type": "string",
+                        "description": "Unique identifier formatted as 'pmpt_<48-digit-hash>'"
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "Dictionary of prompt variable names and values"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "version",
+                    "prompt_id"
+                ],
+                "title": "Prompt",
+                "description": "A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack."
+            },
            "OpenAIDeleteResponseObject": {
                "type": "object",
                "properties": {
@ -10274,7 +10612,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "benchmark",
                        "default": "benchmark",
@ -10901,7 +11240,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "dataset",
                        "default": "dataset",
@ -11051,7 +11391,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "model",
                        "default": "model",
@ -11316,7 +11657,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "scoring_function",
                        "default": "scoring_function",
@ -11424,7 +11766,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "shield",
                        "default": "shield",
@ -11669,7 +12012,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "tool",
                        "default": "tool",
@ -11751,7 +12095,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "tool_group",
                        "default": "tool_group",
@ -12045,7 +12390,8 @@
                            "scoring_function",
                            "benchmark",
                            "tool",
-                            "tool_group"
+                            "tool_group",
+                            "prompt"
                        ],
                        "const": "vector_db",
                        "default": "vector_db",
@ -12860,6 +13206,23 @@
                "title": "OpenAIResponseObjectWithInput",
                "description": "OpenAI response object extended with input context information."
            },
+            "ListPromptsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Prompt"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListPromptsResponse",
+                "description": "Response model to list prompts."
+            },
            "ListProvidersResponse": {
                "type": "object",
                "properties": {
@ -17106,6 +17469,20 @@
                "title": "ScoreBatchResponse",
                "description": "Response from batch scoring operations on datasets."
            },
+            "SetDefaultVersionRequest": {
+                "type": "object",
+                "properties": {
+                    "version": {
+                        "type": "string",
+                        "description": "The version to set as default."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "version"
+                ],
+                "title": "SetDefaultVersionRequest"
+            },
            "AlgorithmConfig": {
                "oneOf": [
                    {
@ -17390,6 +17767,27 @@
                "title": "SyntheticDataGenerationResponse",
                "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
            },
+            "UpdatePromptRequest": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The updated prompt text content."
+                    },
+                    "variables": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        },
+                        "description": "Updated dictionary of variable names to their default values."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompt"
+                ],
+                "title": "UpdatePromptRequest"
+            },
            "VersionInfo": {
                "type": "object",
                "properties": {
@ -17515,6 +17913,10 @@
        {
            "name": "PostTraining (Coming Soon)"
        },
+        {
+            "name": "Prompts",
+            "x-displayName": "Protocol for prompt management operations."
+        },
        {
            "name": "Providers",
            "x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
@ -17565,6 +17967,7 @@
                "Inspect",
                "Models",
                "PostTraining (Coming Soon)",
+                "Prompts",
                "Providers",
                "Safety",
                "Scoring",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -427,6 +427,58 @@ paths:
            schema:
              $ref: '#/components/schemas/CreateOpenaiResponseRequest'
        required: true
+  /v1/prompts:
+    get:
+      responses:
+        '200':
+          description: >-
+            A ListPromptsResponse containing all prompts.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListPromptsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Prompts
+      description: List all prompts.
+      parameters: []
+    post:
+      responses:
+        '200':
+          description: The created Prompt resource.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Prompts
+      description: Create a new prompt.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreatePromptRequest'
+        required: true
  /v1/agents/{agent_id}:
    get:
      responses:
@ -616,6 +668,103 @@ paths:
          required: true
          schema:
            type: string
+  /v1/prompts/{prompt_id}:
+    get:
+      responses:
+        '200':
+          description: A Prompt resource.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Prompts
+      description: >-
+        Get a prompt by its identifier and optional version.
+      parameters:
+        - name: prompt_id
+          in: path
+          description: The identifier of the prompt to get.
+          required: true
+          schema:
+            type: string
+        - name: version
+          in: query
+          description: >-
+            The version of the prompt to get (defaults to latest).
+          required: false
+          schema:
+            type: string
+    post:
+      responses:
+        '200':
+          description: >-
+            The updated Prompt resource with incremented version.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Prompts
+      description: >-
+        Update an existing prompt (increments version).
+      parameters:
+        - name: prompt_id
+          in: path
+          description: The identifier of the prompt to update.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/UpdatePromptRequest'
+        required: true
+    delete:
+      responses:
+        '200':
+          description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Prompts
+      description: Delete a prompt.
+      parameters:
+        - name: prompt_id
+          in: path
+          description: The identifier of the prompt to delete.
+          required: true
+          schema:
+            type: string
  /v1/inference/embeddings:
    post:
      responses:
@ -2933,6 +3082,10 @@ paths:
        - file: The File object (not file name) to be uploaded.

        - purpose: The intended purpose of the uploaded file.
+
+        - expires_after: Optional form values describing expiration for the file.
+        Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>.
+        Seconds must be between 3600 and 2592000 (1 hour to 30 days).
      parameters: []
      requestBody:
        content:
@ -2945,9 +3098,19 @@ paths:
                  format: binary
                purpose:
                  $ref: '#/components/schemas/OpenAIFilePurpose'
+                expires_after_anchor:
+                  oneOf:
+                    - type: string
+                    - type: 'null'
+                expires_after_seconds:
+                  oneOf:
+                    - type: integer
+                    - type: 'null'
              required:
                - file
                - purpose
+                - expires_after_anchor
+                - expires_after_seconds
        required: true
  /v1/openai/v1/models:
    get:
@ -3532,6 +3695,43 @@ paths:
            schema:
              $ref: '#/components/schemas/ScoreBatchRequest'
        required: true
+  /v1/prompts/{prompt_id}/default-version:
+    post:
+      responses:
+        '200':
+          description: >-
+            The prompt with the specified version now set as default.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Prompts
+      description: >-
+        Set which version of a prompt should be the default in get_prompt (latest).
+      parameters:
+        - name: prompt_id
+          in: path
+          description: The identifier of the prompt.
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/SetDefaultVersionRequest'
+        required: true
  /v1/post-training/supervised-fine-tune:
    post:
      responses:
@ -7134,6 +7334,53 @@ components:
        - type
      title: >-
        OpenAIResponseObjectStreamResponseWebSearchCallSearching
+    CreatePromptRequest:
+      type: object
+      properties:
+        prompt:
+          type: string
+          description: >-
+            The prompt text content with variable placeholders.
+        variables:
+          type: object
+          additionalProperties:
+            type: string
+          description: >-
+            Dictionary of variable names to their default values.
+      additionalProperties: false
+      required:
+        - prompt
+      title: CreatePromptRequest
+    Prompt:
+      type: object
+      properties:
+        prompt:
+          type: string
+          description: >-
+            The system prompt text with variable placeholders. Variables are only
+            supported when using the Responses API.
+        version:
+          type: string
+          description: >-
+            Version string (integer start at 1 cast as string, incremented on save)
+        prompt_id:
+          type: string
+          description: >-
+            Unique identifier formatted as 'pmpt_<48-digit-hash>'
+        variables:
+          type: object
+          additionalProperties:
+            type: string
+          description: >-
+            Dictionary of prompt variable names and values
+      additionalProperties: false
+      required:
+        - version
+        - prompt_id
+      title: Prompt
+      description: >-
+        A prompt resource representing a stored OpenAI Compatible prompt template
+        in Llama Stack.
    OpenAIDeleteResponseObject:
      type: object
      properties:
@ -7607,6 +7854,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: benchmark
          default: benchmark
          description: The resource type, always benchmark
@ -8093,6 +8341,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: dataset
          default: dataset
          description: >-
@ -8205,6 +8454,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: model
          default: model
          description: >-
@ -8396,6 +8646,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: scoring_function
          default: scoring_function
          description: >-
@ -8472,6 +8723,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: shield
          default: shield
          description: The resource type, always shield
@ -8651,6 +8903,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: tool
          default: tool
          description: Type of resource, always 'tool'
@ -8709,6 +8962,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: tool_group
          default: tool_group
          description: Type of resource, always 'tool_group'
@ -8937,6 +9191,7 @@ components:
            - benchmark
            - tool
            - tool_group
+            - prompt
          const: vector_db
          default: vector_db
          description: >-
@ -9563,6 +9818,18 @@ components:
      title: OpenAIResponseObjectWithInput
      description: >-
        OpenAI response object extended with input context information.
+    ListPromptsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Prompt'
+      additionalProperties: false
+      required:
+        - data
+      title: ListPromptsResponse
+      description: Response model to list prompts.
    ListProvidersResponse:
      type: object
      properties:
@ -12708,6 +12975,16 @@ components:
      title: ScoreBatchResponse
      description: >-
        Response from batch scoring operations on datasets.
+    SetDefaultVersionRequest:
+      type: object
+      properties:
+        version:
+          type: string
+          description: The version to set as default.
+      additionalProperties: false
+      required:
+        - version
+      title: SetDefaultVersionRequest
    AlgorithmConfig:
      oneOf:
        - $ref: '#/components/schemas/LoraFinetuningConfig'
@ -12904,6 +13181,22 @@ components:
      description: >-
        Response from the synthetic data generation. Batch of (prompt, response, score)
        tuples that pass the threshold.
+    UpdatePromptRequest:
+      type: object
+      properties:
+        prompt:
+          type: string
+          description: The updated prompt text content.
+        variables:
+          type: object
+          additionalProperties:
+            type: string
+          description: >-
+            Updated dictionary of variable names to their default values.
+      additionalProperties: false
+      required:
+        - prompt
+      title: UpdatePromptRequest
    VersionInfo:
      type: object
      properties:
@ -13015,6 +13308,9 @@ tags:
  - name: Inspect
  - name: Models
  - name: PostTraining (Coming Soon)
+  - name: Prompts
+    x-displayName: >-
+      Protocol for prompt management operations.
  - name: Providers
    x-displayName: >-
      Providers API for inspecting, listing, and modifying providers and their configurations.
@ -13042,6 +13338,7 @@ x-tagGroups:
      - Inspect
      - Models
      - PostTraining (Coming Soon)
+      - Prompts
      - Providers
      - Safety
      - Scoring
--- a/docs/source/contributing/testing/record-replay.md
+++ b/docs/source/contributing/testing/record-replay.md
@ -40,18 +40,15 @@ The system patches OpenAI and Ollama client methods to intercept calls before th

 ### Storage Architecture

-Recordings use a two-tier storage system optimized for both speed and debuggability:
+Recordings are stored as JSON files in the recording directory. They are looked up by their request hash.

 ```
 recordings/
-├── index.sqlite          # Fast lookup by request hash
 └── responses/
    ├── abc123def456.json  # Individual response files
    └── def789ghi012.json
 ```

-**SQLite index** enables O(log n) hash lookups and metadata queries without loading response bodies.
-
 **JSON files** store complete request/response pairs in human-readable format for debugging.

 ## Recording Modes
@ -166,8 +163,8 @@ This preserves type safety - when replayed, you get the same Pydantic objects wi
 Control recording behavior globally:

 ```bash
-export LLAMA_STACK_TEST_INFERENCE_MODE=replay
-export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings
+export LLAMA_STACK_TEST_INFERENCE_MODE=replay   # this is the default
+export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings   # default is tests/integration/recordings
 pytest tests/integration/
 ```

--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ b/docs/source/distributions/self_hosted_distro/nvidia.md
@ -50,6 +50,7 @@ The following models are available by default:
 - `meta/llama-3.2-11b-vision-instruct `
 - `meta/llama-3.2-90b-vision-instruct `
 - `meta/llama-3.3-70b-instruct `
+- `nvidia/vila `
 - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
 - `nvidia/nv-embedqa-e5-v5 `
 - `nvidia/nv-embedqa-mistral-7b-v2 `
--- a/docs/source/providers/vector_io/remote_pgvector.md
+++ b/docs/source/providers/vector_io/remote_pgvector.md
@ -12,6 +12,60 @@ That means you'll get fast and efficient vector retrieval.
 - Easy to use
 - Fully integrated with Llama Stack

+There are three implementations of search for PGVectoIndex available:
+
+1. Vector Search:
+- How it works:
+  - Uses PostgreSQL's vector extension (pgvector) to perform similarity search
+  - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
+  - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
+
+-Characteristics:
+  - Semantic understanding - finds documents similar in meaning even if they don't share keywords
+  - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
+  - Best for: Finding conceptually related content, handling synonyms, cross-language search
+
+2. Keyword Search
+- How it works:
+  - Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
+  - Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
+  - Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
+
+- Characteristics:
+  - Lexical matching - finds exact keyword matches and variations
+  - Uses GIN (Generalized Inverted Index) for fast text search performance
+  - Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
+  - Best for: Exact term matching, proper names, technical terms, Boolean-style queries
+
+3. Hybrid Search
+- How it works:
+  - Combines both vector and keyword search results
+  - Runs both searches independently, then merges results using configurable reranking
+
+- Two reranking strategies available:
+    - Reciprocal Rank Fusion (RRF) - (default: 60.0)
+    - Weighted Average - (default: 0.5)
+
+- Characteristics:
+  - Best of both worlds: semantic understanding + exact matching
+  - Documents appearing in both searches get boosted scores
+  - Configurable balance between semantic and lexical matching
+  - Best for: General-purpose search where you want both precision and recall
+
+4. Database Schema
+The PGVector implementation stores data optimized for all three search types:
+CREATE TABLE vector_store_xxx (
+    id TEXT PRIMARY KEY,
+    document JSONB,                    -- Original document
+    embedding vector(dimension),        -- For vector search
+    content_text TEXT,                 -- Raw text content
+    tokenized_content TSVECTOR          -- For keyword search
+);
+
+-- Indexes for performance
+CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content);  -- Keyword search
+-- Vector index created automatically by pgvector
+
 ## Usage

 To use PGVector in your Llama Stack project, follow these steps:
@ -20,6 +74,25 @@ To use PGVector in your Llama Stack project, follow these steps:
 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
 3. Start storing and querying vectors.

+## This is an example how you can set up your environment for using PGVector
+
+1. Export env vars:
+```bash
+export ENABLE_PGVECTOR=true
+export PGVECTOR_HOST=localhost
+export PGVECTOR_PORT=5432
+export PGVECTOR_DB=llamastack
+export PGVECTOR_USER=llamastack
+export PGVECTOR_PASSWORD=llamastack
+```
+
+2. Create DB:
+```bash
+psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
+psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
+psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
+```
+
 ## Installation

 You can install PGVector using docker:
--- a/docs/source/providers/vector_io/remote_weaviate.md
+++ b/docs/source/providers/vector_io/remote_weaviate.md
@ -17,6 +17,7 @@ Weaviate supports:
 - Metadata filtering
 - Multi-modal retrieval

+
 ## Usage

 To use Weaviate in your Llama Stack project, follow these steps:
--- a/docs/source/references/llama_stack_client_cli_reference.md
+++ b/docs/source/references/llama_stack_client_cli_reference.md
@ -478,7 +478,6 @@ llama-stack-client scoring_functions list
 ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
 ┃ identifier                                 ┃ provider_id  ┃ description                                                   ┃ type             ┃
 ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
-│ basic::bfcl                                │ basic        │ BFCL complex scoring                                          │ scoring_function │
 │ basic::docvqa                              │ basic        │ DocVQA Visual Question & Answer scoring function              │ scoring_function │
 │ basic::equality                            │ basic        │ Returns 1.0 if the input is equal to the target, 0.0          │ scoring_function │
 │                                            │              │ otherwise.                                                    │                  │
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -102,6 +102,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
    :cvar benchmarks: Benchmark suite management
    :cvar tool_groups: Tool group organization
    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
    :cvar inspect: Built-in system inspection and introspection
    """

@ -127,6 +128,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
    benchmarks = "benchmarks"
    tool_groups = "tool_groups"
    files = "files"
+    prompts = "prompts"

    # built-in API
    inspect = "inspect"
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -5,10 +5,10 @@
 # the root directory of this source tree.

 from enum import StrEnum
-from typing import Annotated, Literal, Protocol, runtime_checkable
+from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable

 from fastapi import File, Form, Response, UploadFile
-from pydantic import BaseModel
+from pydantic import BaseModel, Field

 from llama_stack.apis.common.responses import Order
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
@ -49,6 +49,23 @@ class OpenAIFileObject(BaseModel):
    purpose: OpenAIFilePurpose


+@json_schema_type
+class ExpiresAfter(BaseModel):
+    """
+    Control expiration of uploaded files.
+
+    Params:
+     - anchor, must be "created_at"
+     - seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
+    """
+
+    MIN: ClassVar[int] = 3600  # 1 hour
+    MAX: ClassVar[int] = 2592000  # 30 days
+
+    anchor: Literal["created_at"]
+    seconds: int = Field(..., ge=3600, le=2592000)
+
+
@json_schema_type
 class ListOpenAIFileResponse(BaseModel):
    """
@ -92,6 +109,9 @@ class Files(Protocol):
        self,
        file: Annotated[UploadFile, File()],
        purpose: Annotated[OpenAIFilePurpose, Form()],
+        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
+        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
+        # TODO: expires_after is producing strange openapi spec, params are showing up as a required w/ oneOf being null
    ) -> OpenAIFileObject:
        """
        Upload a file that can be used across various endpoints.
@ -99,6 +119,7 @@ class Files(Protocol):
        The file upload should be a multipart form request with:
        - file: The File object (not file name) to be uploaded.
        - purpose: The intended purpose of the uploaded file.
+        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).

        :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
        :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
--- a/llama_stack/apis/prompts/init.py
+++ b/llama_stack/apis/prompts/init.py
@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .prompts import CreatePromptRequest, ListPromptsResponse, Prompt, Prompts, UpdatePromptRequest
+
+__all__ = ["Prompt", "Prompts", "ListPromptsResponse", "CreatePromptRequest", "UpdatePromptRequest"]
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@ -0,0 +1,173 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import secrets
+from typing import Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field, field_validator
+
+from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class Prompt(BaseModel):
+    """A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+
+    :param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
+    :param version: Version string (integer start at 1 cast as string, incremented on save)
+    :param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
+    :param variables: Dictionary of prompt variable names and values
+    """
+
+    prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
+    version: str = Field(description="Version string (integer start at 1 cast as string)")
+    prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
+    variables: dict[str, str] | None = Field(
+        default_factory=dict, description="Variables for dynamic injection using {{variable}} syntax"
+    )
+
+    @field_validator("prompt_id")
+    @classmethod
+    def validate_prompt_id(cls, prompt_id: str) -> str:
+        if not isinstance(prompt_id, str):
+            raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
+
+        if not prompt_id.startswith("pmpt_"):
+            raise ValueError("prompt_id must start with 'pmpt_' prefix")
+
+        hex_part = prompt_id[5:]
+        if len(hex_part) != 48:
+            raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
+
+        for char in hex_part:
+            if char not in "0123456789abcdef":
+                raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
+
+        return prompt_id
+
+    @field_validator("version")
+    @classmethod
+    def validate_version(cls, prompt_version: str) -> str:
+        try:
+            int_version = int(prompt_version)
+            if int_version < 1:
+                raise ValueError("version must be >= 1")
+        except ValueError as e:
+            if "invalid literal" in str(e):
+                raise ValueError("version must be a string representation of an integer") from e
+            raise
+        return prompt_version
+
+    @classmethod
+    def generate_prompt_id(cls) -> str:
+        # Generate 48 hex characters (24 bytes)
+        random_bytes = secrets.token_bytes(24)
+        hex_string = random_bytes.hex()
+        return f"pmpt_{hex_string}"
+
+
+class CreatePromptRequest(BaseModel):
+    """Request model to create a prompt."""
+
+    prompt: str = Field(description="The prompt text content")
+    variables: dict[str, str] = Field(default_factory=dict, description="Variables for dynamic injection")
+
+
+class UpdatePromptRequest(BaseModel):
+    """Request model for updating a prompt."""
+
+    prompt: str = Field(description="The prompt text content")
+    variables: dict[str, str] = Field(default_factory=dict, description="Variables for dynamic injection")
+
+
+class ListPromptsResponse(BaseModel):
+    """Response model to list prompts."""
+
+    data: list[Prompt]
+
+
+@runtime_checkable
+@trace_protocol
+class Prompts(Protocol):
+    """Protocol for prompt management operations."""
+
+    @webmethod(route="/prompts", method="GET")
+    async def list_prompts(self) -> ListPromptsResponse:
+        """List all prompts.
+
+        :returns: A ListPromptsResponse containing all prompts.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id:path}", method="GET")
+    async def get_prompt(
+        self,
+        prompt_id: str,
+        version: str | None = None,
+    ) -> Prompt:
+        """Get a prompt by its identifier and optional version.
+
+        :param prompt_id: The identifier of the prompt to get.
+        :param version: The version of the prompt to get (defaults to latest).
+        :returns: A Prompt resource.
+        """
+        ...
+
+    @webmethod(route="/prompts", method="POST")
+    async def create_prompt(
+        self,
+        prompt: str,
+        variables: dict[str, str] | None = None,
+    ) -> Prompt:
+        """Create a new prompt.
+
+        :param prompt: The prompt text content with variable placeholders.
+        :param variables: Dictionary of variable names to their default values.
+        :returns: The created Prompt resource.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id:path}", method="PUT")
+    async def update_prompt(
+        self,
+        prompt_id: str,
+        prompt: str,
+        variables: dict[str, str] | None = None,
+    ) -> Prompt:
+        """Update an existing prompt (increments version).
+
+        :param prompt_id: The identifier of the prompt to update.
+        :param prompt: The updated prompt text content.
+        :param variables: Updated dictionary of variable names to their default values.
+        :returns: The updated Prompt resource with incremented version.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id:path}", method="DELETE")
+    async def delete_prompt(
+        self,
+        prompt_id: str,
+    ) -> None:
+        """Delete a prompt.
+
+        :param prompt_id: The identifier of the prompt to delete.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id:path}/default-version", method="PUT")
+    async def set_default_version(
+        self,
+        prompt_id: str,
+        version: str,
+    ) -> Prompt:
+        """Set which version of a prompt should be the default in get_prompt (latest).
+
+        :param prompt_id: The identifier of the prompt.
+        :param version: The version to set as default.
+        :returns: The prompt with the specified version now set as default.
+        """
+        ...
--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@ -19,6 +19,7 @@ class ResourceType(StrEnum):
    benchmark = "benchmark"
    tool = "tool"
    tool_group = "tool_group"
+    prompt = "prompt"


 class Resource(BaseModel):
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/init.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/init.py
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@ -0,0 +1,182 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+from typing import Any
+
+from pydantic import BaseModel
+
+from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
+from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
+from llama_stack.providers.utils.kvstore.config import KVStoreConfig
+
+
+class PromptServiceConfig(BaseModel):
+    """Configuration for the built-in prompt service.
+
+    :param kvstore: Configuration for the key-value store backend
+    """
+
+    kvstore: KVStoreConfig
+
+
+async def get_provider_impl(config: PromptServiceConfig, deps: dict[Any, Any]):
+    """Get the prompt service implementation."""
+    impl = PromptServiceImpl(config, deps)
+    await impl.initialize()
+    return impl
+
+
+class PromptServiceImpl(Prompts):
+    """Built-in prompt service implementation using KVStore."""
+
+    def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]):
+        self.config = config
+        self.deps = deps
+        self.kvstore: KVStore
+
+    async def initialize(self) -> None:
+        self.kvstore = await kvstore_impl(self.config.kvstore)
+
+    def _get_prompt_key(self, prompt_id: str, version: str | None = None) -> str:
+        if version:
+            return f"prompts:v1:{prompt_id}:{version}"
+        return f"prompts:v1:{prompt_id}:default"
+
+    def _get_version_key(self, prompt_id: str, version: str) -> str:
+        """Get the KVStore key for a specific prompt version."""
+        return f"prompts:v1:{prompt_id}:{version}"
+
+    def _get_list_key_prefix(self) -> str:
+        """Get the key prefix for listing prompts."""
+        return "prompts:v1:"
+
+    def _serialize_prompt(self, prompt: Prompt) -> str:
+        """Serialize a prompt to JSON string for storage."""
+        return json.dumps(
+            {
+                "prompt_id": prompt.prompt_id,
+                "prompt": prompt.prompt,
+                "version": prompt.version,
+                "variables": prompt.variables or {},
+            }
+        )
+
+    def _deserialize_prompt(self, data: str) -> Prompt:
+        """Deserialize a prompt from JSON string."""
+        obj = json.loads(data)
+        return Prompt(
+            prompt_id=obj["prompt_id"], prompt=obj["prompt"], version=obj["version"], variables=obj.get("variables", {})
+        )
+
+    async def list_prompts(self) -> ListPromptsResponse:
+        """List all prompts (default versions only)."""
+        prefix = self._get_list_key_prefix()
+        keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
+
+        prompts = []
+        for key in keys:
+            if key.endswith(":default"):
+                try:
+                    default_version = await self.kvstore.get(key)
+                    if default_version:
+                        prompt_id = key.replace(prefix, "").replace(":default", "")
+                        version_key = self._get_version_key(prompt_id, default_version)
+                        data = await self.kvstore.get(version_key)
+                        if data:
+                            prompt = self._deserialize_prompt(data)
+                            prompts.append(prompt)
+                except (json.JSONDecodeError, KeyError):
+                    continue
+
+        prompts.sort(key=lambda p: p.prompt_id or "", reverse=True)
+        return ListPromptsResponse(data=prompts)
+
+    async def get_prompt(self, prompt_id: str, version: str | None = None) -> Prompt:
+        """Get a prompt by its identifier and optional version."""
+        if version:
+            key = self._get_version_key(prompt_id, version)
+            data = await self.kvstore.get(key)
+            if data is None:
+                raise ValueError(f"Prompt {prompt_id} version {version} not found")
+        else:
+            default_key = self._get_prompt_key(prompt_id)
+            default_version = await self.kvstore.get(default_key)
+            if default_version is None:
+                raise ValueError(f"Prompt with ID '{prompt_id}' not found")
+
+            key = self._get_version_key(prompt_id, default_version)
+            data = await self.kvstore.get(key)
+            if data is None:
+                raise ValueError(f"Prompt with ID '{prompt_id}' not found")
+
+        return self._deserialize_prompt(data)
+
+    async def create_prompt(
+        self,
+        prompt: str,
+        variables: dict[str, str] | None = None,
+    ) -> Prompt:
+        """Create a new prompt."""
+        if variables is None:
+            variables = {}
+
+        prompt_obj = Prompt(prompt_id=Prompt.generate_prompt_id(), prompt=prompt, version="1", variables=variables)
+
+        version_key = self._get_version_key(prompt_obj.prompt_id, "1")
+        data = self._serialize_prompt(prompt_obj)
+        await self.kvstore.set(version_key, data)
+
+        default_key = self._get_prompt_key(prompt_obj.prompt_id)
+        await self.kvstore.set(default_key, "1")
+
+        return prompt_obj
+
+    async def update_prompt(
+        self,
+        prompt_id: str,
+        prompt: str,
+        variables: dict[str, str] | None = None,
+    ) -> Prompt:
+        """Update an existing prompt (increments version)."""
+        if variables is None:
+            variables = {}
+
+        current_prompt = await self.get_prompt(prompt_id)
+        new_version = str(int(current_prompt.version) + 1)
+
+        updated_prompt = Prompt(prompt_id=prompt_id, prompt=prompt, version=new_version, variables=variables)
+
+        version_key = self._get_version_key(prompt_id, new_version)
+        data = self._serialize_prompt(updated_prompt)
+        await self.kvstore.set(version_key, data)
+
+        default_key = self._get_prompt_key(prompt_id)
+        await self.kvstore.set(default_key, new_version)
+
+        return updated_prompt
+
+    async def delete_prompt(self, prompt_id: str) -> None:
+        """Delete a prompt and all its versions."""
+        await self.get_prompt(prompt_id)
+
+        prefix = f"prompts:v1:{prompt_id}:"
+        keys = await self.kvstore.keys_in_range(prefix, prefix + "\xff")
+
+        for key in keys:
+            await self.kvstore.delete(key)
+
+    async def set_default_version(self, prompt_id: str, version: str) -> Prompt:
+        """Set which version of a prompt should be the default (latest)."""
+        version_key = self._get_version_key(prompt_id, version)
+        data = await self.kvstore.get(version_key)
+        if data is None:
+            raise ValueError(f"Prompt {prompt_id} version {version} not found")
+
+        default_key = self._get_prompt_key(prompt_id)
+        await self.kvstore.set(default_key, version)
+
+        return self._deserialize_prompt(data)
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference, InferenceProvider
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
+from llama_stack.apis.prompts import Prompts
 from llama_stack.apis.providers import Providers as ProvidersAPI
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
@ -93,6 +94,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
        Api.tool_groups: ToolGroups,
        Api.tool_runtime: ToolRuntime,
        Api.files: Files,
+        Api.prompts: Prompts,
    }

    if external_apis:
@ -284,7 +286,15 @@ async def instantiate_providers(
        if provider.provider_id is None:
            continue

+        try:
            deps = {a: impls[a] for a in provider.spec.api_dependencies}
+        except KeyError as e:
+            missing_api = e.args[0]
+            raise RuntimeError(
+                f"Failed to resolve '{provider.spec.api.value}' provider '{provider.provider_id}' of type '{provider.spec.provider_type}': "
+                f"required dependency '{missing_api.value}' is not available. "
+                f"Please add a '{missing_api.value}' provider to your configuration or check if the provider is properly configured."
+            ) from e
        for a in provider.spec.optional_api_dependencies:
            if a in impls:
                deps[a] = impls[a]
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@ -755,7 +755,7 @@ class InferenceRouter(Inference):
                            choices_data[idx] = {
                                "content_parts": [],
                                "tool_calls_builder": {},
-                                "finish_reason": None,
+                                "finish_reason": "stop",
                                "logprobs_content_parts": [],
                            }
                        current_choice_data = choices_data[idx]
--- a/llama_stack/core/server/server.py
+++ b/llama_stack/core/server/server.py
@ -132,9 +132,9 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
            },
        )
    elif isinstance(exc, ConflictError):
-        return HTTPException(status_code=409, detail=str(exc))
+        return HTTPException(status_code=httpx.codes.CONFLICT, detail=str(exc))
    elif isinstance(exc, ResourceNotFoundError):
-        return HTTPException(status_code=404, detail=str(exc))
+        return HTTPException(status_code=httpx.codes.NOT_FOUND, detail=str(exc))
    elif isinstance(exc, ValueError):
        return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=f"Invalid value: {str(exc)}")
    elif isinstance(exc, BadRequestError):
@ -513,6 +513,7 @@ def main(args: argparse.Namespace | None = None):

    apis_to_serve.add("inspect")
    apis_to_serve.add("providers")
+    apis_to_serve.add("prompts")
    for api_str in apis_to_serve:
        api = Api(api_str)

--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -24,6 +24,7 @@ from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.models import Models
 from llama_stack.apis.post_training import PostTraining
+from llama_stack.apis.prompts import Prompts
 from llama_stack.apis.providers import Providers
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.scoring import Scoring
@ -37,6 +38,7 @@ from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.datatypes import Provider, StackRunConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
+from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
 from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
 from llama_stack.core.resolver import ProviderRegistry, resolve_impls
 from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
@ -44,6 +46,7 @@ from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig

 logger = get_logger(name=__name__, category="core")

@ -72,6 +75,7 @@ class LlamaStack(
    ToolRuntime,
    RAGToolRuntime,
    Files,
+    Prompts,
 ):
    pass

@ -105,12 +109,12 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):

        method = getattr(impls[api], register_method)
        for obj in objects:
-            logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")
-
+            if hasattr(obj, "provider_id"):
                # Do not register models on disabled providers
-            if hasattr(obj, "provider_id") and (not obj.provider_id or obj.provider_id == "__disabled__"):
+                if not obj.provider_id or obj.provider_id == "__disabled__":
                    logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
                    continue
+                logger.debug(f"registering {rsrc.capitalize()} {obj} for provider {obj.provider_id}")

            # we want to maintain the type information in arguments to method.
            # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
@ -305,6 +309,12 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
    )
    impls[Api.providers] = providers_impl

+    prompts_impl = PromptServiceImpl(
+        PromptServiceConfig(kvstore=SqliteKVStoreConfig(db_path=os.path.expanduser("~/.llama-stack/prompts.db"))),
+        deps=impls,
+    )
+    impls[Api.prompts] = prompts_impl
+

 # Produces a stack of providers for the given run config. Not all APIs may be
 # asked for in the run config.
@ -329,6 +339,9 @@ async def construct_stack(
    # Add internal implementations after all other providers are resolved
    add_internal_implementations(impls, run_config)

+    if Api.prompts in impls:
+        await impls[Api.prompts].initialize()
+
    await register_resources(run_config, impls)

    await refresh_registry_once(impls)
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@ -134,6 +134,11 @@ models:
  provider_id: nvidia
  provider_model_id: meta/llama-3.3-70b-instruct
  model_type: llm
+- metadata: {}
+  model_id: nvidia/vila
+  provider_id: nvidia
+  provider_model_id: nvidia/vila
+  model_type: llm
 - metadata:
    embedding_dimension: 2048
    context_length: 8192
--- a/llama_stack/distributions/open-benchmark/open_benchmark.py
+++ b/llama_stack/distributions/open-benchmark/open_benchmark.py
@ -43,7 +43,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
            "openai",
            [
                ProviderModelEntry(
-                    provider_model_id="openai/gpt-4o",
+                    provider_model_id="gpt-4o",
                    model_type=ModelType.llm,
                )
            ],
@ -53,7 +53,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
            "anthropic",
            [
                ProviderModelEntry(
-                    provider_model_id="anthropic/claude-3-5-sonnet-latest",
+                    provider_model_id="claude-3-5-sonnet-latest",
                    model_type=ModelType.llm,
                )
            ],
@ -206,13 +206,6 @@ def get_distribution_template() -> DistributionTemplate:
                uri="huggingface://datasets/llamastack/math_500?split=test",
            ),
        ),
-        DatasetInput(
-            dataset_id="bfcl",
-            purpose=DatasetPurpose.eval_messages_answer,
-            source=URIDataSource(
-                uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
-            ),
-        ),
        DatasetInput(
            dataset_id="ifeval",
            purpose=DatasetPurpose.eval_messages_answer,
@ -250,11 +243,6 @@ def get_distribution_template() -> DistributionTemplate:
            dataset_id="math_500",
            scoring_functions=["basic::regex_parser_math_response"],
        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-bfcl",
-            dataset_id="bfcl",
-            scoring_functions=["basic::bfcl"],
-        ),
        BenchmarkInput(
            benchmark_id="meta-reference-ifeval",
            dataset_id="ifeval",
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@ -136,14 +136,14 @@ inference_store:
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
 models:
 - metadata: {}
-  model_id: openai/gpt-4o
+  model_id: gpt-4o
  provider_id: openai
-  provider_model_id: openai/gpt-4o
+  provider_model_id: gpt-4o
  model_type: llm
 - metadata: {}
-  model_id: anthropic/claude-3-5-sonnet-latest
+  model_id: claude-3-5-sonnet-latest
  provider_id: anthropic
-  provider_model_id: anthropic/claude-3-5-sonnet-latest
+  provider_model_id: claude-3-5-sonnet-latest
  model_type: llm
 - metadata: {}
  model_id: gemini/gemini-1.5-flash
@ -188,12 +188,6 @@ datasets:
    uri: huggingface://datasets/llamastack/math_500?split=test
  metadata: {}
  dataset_id: math_500
- purpose: eval/messages-answer
-  source:
-    type: uri
-    uri: huggingface://datasets/llamastack/bfcl_v3?split=train
-  metadata: {}
-  dataset_id: bfcl
 - purpose: eval/messages-answer
  source:
    type: uri
@ -228,11 +222,6 @@ benchmarks:
  - basic::regex_parser_math_response
  metadata: {}
  benchmark_id: meta-reference-math-500
- dataset_id: bfcl
-  scoring_functions:
-  - basic::bfcl
-  metadata: {}
-  benchmark_id: meta-reference-bfcl
 - dataset_id: ifeval
  scoring_functions:
  - basic::ifeval
--- a/llama_stack/providers/inline/files/localfs/files.py
+++ b/llama_stack/providers/inline/files/localfs/files.py
@ -86,11 +86,16 @@ class LocalfsFilesImpl(Files):
        self,
        file: Annotated[UploadFile, File()],
        purpose: Annotated[OpenAIFilePurpose, Form()],
+        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
+        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
    ) -> OpenAIFileObject:
        """Upload a file that can be used across various endpoints."""
        if not self.sql_store:
            raise RuntimeError("Files provider not initialized")

+        if expires_after_anchor is not None or expires_after_seconds is not None:
+            raise NotImplementedError("File expiration is not supported by this provider")
+
        file_id = self._generate_file_id()
        file_path = self._get_file_path(file_id)

--- a/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring.py
@ -22,7 +22,6 @@ from llama_stack.providers.utils.common.data_schema_validator import (
 )

 from .config import BasicScoringConfig
-from .scoring_fn.bfcl_scoring_fn import BFCLScoringFn
 from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn
 from .scoring_fn.equality_scoring_fn import EqualityScoringFn
 from .scoring_fn.ifeval_scoring_fn import IfEvalScoringFn
@ -37,7 +36,6 @@ FIXED_FNS = [
    SubsetOfScoringFn,
    RegexParserScoringFn,
    RegexParserMathResponseScoringFn,
-    BFCLScoringFn,
    IfEvalScoringFn,
    DocVQAScoringFn,
 ]
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/bfcl_scoring_fn.py
@ -1,93 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-import re
-from typing import Any
-
-from llama_stack.apis.scoring import ScoringResultRow
-from llama_stack.apis.scoring_functions import ScoringFnParams
-from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
-
-from ..utils.bfcl.ast_parser import decode_ast
-from ..utils.bfcl.checker import ast_checker, is_empty_output
-from .fn_defs.bfcl import bfcl
-
-
-def postprocess(x: dict[str, Any], test_category: str) -> dict[str, Any]:
-    contain_func_call = False
-    error = None
-    error_type = None
-    checker_result = {}
-    try:
-        prediction = decode_ast(x["generated_answer"], x["language"]) or ""
-        contain_func_call = True
-        # if not is_function_calling_format_output(prediction):
-        if is_empty_output(prediction):
-            contain_func_call = False
-            error = "Did not output in the specified format. Note: the model_result is wrapped in a string to ensure json serializability."
-            error_type = "ast_decoder:decoder_wrong_output_format"
-        else:
-            checker_result = ast_checker(
-                json.loads(x["function"]),
-                prediction,
-                json.loads(x["ground_truth"]),
-                x["language"],
-                test_category=test_category,
-                model_name="",
-            )
-    except Exception as e:
-        prediction = ""
-        error = f"Invalid syntax. Failed to decode AST. {str(e)}"
-        error_type = "ast_decoder:decoder_failed"
-    return {
-        "prediction": prediction,
-        "contain_func_call": contain_func_call,
-        "valid": checker_result.get("valid", False),
-        "error": error or checker_result.get("error", ""),
-        "error_type": error_type or checker_result.get("error_type", ""),
-    }
-
-
-def gen_valid(x: dict[str, Any]) -> dict[str, float]:
-    return {"valid": x["valid"]}
-
-
-def gen_relevance_acc(x: dict[str, Any]) -> dict[str, float]:
-    # This function serves for both relevance and irrelevance tests, which share the exact opposite logic.
-    # If `test_category` is "irrelevance", the model is expected to output no function call.
-    # No function call means either the AST decoding fails (a error message is generated) or the decoded AST does not contain any function call (such as a empty list, `[]`).
-    # If `test_category` is "relevance", the model is expected to output to a function call, and empty list doesn't count as a function call.
-    acc = not x["contain_func_call"] if "irrelevance" in x["id"] else x["contain_func_call"]
-    return {"valid": float(acc)}
-
-
-class BFCLScoringFn(RegisteredBaseScoringFn):
-    """
-    A scoring_fn for BFCL
-    """
-
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        self.supported_fn_defs_registry = {
-            bfcl.identifier: bfcl,
-        }
-
-    async def score_row(
-        self,
-        input_row: dict[str, Any],
-        scoring_fn_identifier: str | None = "bfcl",
-        scoring_params: ScoringFnParams | None = None,
-    ) -> ScoringResultRow:
-        test_category = re.sub(r"_[0-9_-]+$", "", input_row["id"])
-        score_result = postprocess(input_row, test_category)
-        if test_category in {"irrelevance", "live_relevance", "live_irrelevance"}:
-            score = gen_relevance_acc(score_result)["valid"]
-        else:
-            score = gen_valid(score_result)["valid"]
-        return {
-            "score": float(score),
-        }
--- a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/bfcl.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/bfcl.py
@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.common.type_system import NumberType
-from llama_stack.apis.scoring_functions import (
-    AggregationFunctionType,
-    BasicScoringFnParams,
-    ScoringFn,
-)
-
-bfcl = ScoringFn(
-    identifier="basic::bfcl",
-    description="BFCL complex scoring",
-    return_type=NumberType(),
-    provider_id="basic",
-    provider_resource_id="bfcl",
-    params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.accuracy]),
-)
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/ast_parser.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/ast_parser.py
@ -1,296 +0,0 @@
-# ruff: noqa
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import ast
-
-from .tree_sitter import get_parser
-
-
-def parse_java_function_call(source_code):
-    if not source_code.endswith(";"):
-        source_code += ";"  # Necessary for the parser not to register an error
-    parser = get_parser("java")
-    tree = parser.parse(bytes(source_code, "utf8"))
-    root_node = tree.root_node
-
-    if root_node.has_error:
-        raise Exception("Error parsing java the source code.")
-
-    def get_text(node):
-        """Returns the text represented by the node."""
-        return source_code[node.start_byte : node.end_byte]
-
-    def traverse_node(node, nested=False):
-        if node.type == "string_literal":
-            if nested:
-                return get_text(node)
-            # Strip surrounding quotes from string literals
-            return get_text(node)[1:-1]
-        elif node.type == "character_literal":
-            if nested:
-                return get_text(node)
-            # Strip surrounding single quotes from character literals
-            return get_text(node)[1:-1]
-        """Traverse the node to collect texts for complex structures."""
-        if node.type in [
-            "identifier",
-            "class_literal",
-            "type_identifier",
-            "method_invocation",
-        ]:
-            return get_text(node)
-        elif node.type == "array_creation_expression":
-            # Handle array creation expression specifically
-            type_node = node.child_by_field_name("type")
-            value_node = node.child_by_field_name("value")
-            type_text = traverse_node(type_node, True)
-            value_text = traverse_node(value_node, True)
-            return f"new {type_text}[]{value_text}"
-        elif node.type == "object_creation_expression":
-            # Handle object creation expression specifically
-            type_node = node.child_by_field_name("type")
-            arguments_node = node.child_by_field_name("arguments")
-            type_text = traverse_node(type_node, True)
-            if arguments_node:
-                # Process each argument carefully, avoiding unnecessary punctuation
-                argument_texts = []
-                for child in arguments_node.children:
-                    if child.type not in [
-                        ",",
-                        "(",
-                        ")",
-                    ]:  # Exclude commas and parentheses
-                        argument_text = traverse_node(child, True)
-                        argument_texts.append(argument_text)
-                arguments_text = ", ".join(argument_texts)
-                return f"new {type_text}({arguments_text})"
-            else:
-                return f"new {type_text}()"
-        elif node.type == "set":
-            # Handling sets specifically
-            items = [traverse_node(n, True) for n in node.children if n.type not in [",", "set"]]
-            return "{" + ", ".join(items) + "}"
-
-        elif node.child_count > 0:
-            return "".join(traverse_node(child, True) for child in node.children)
-        else:
-            return get_text(node)
-
-    def extract_arguments(args_node):
-        arguments = {}
-        for child in args_node.children:
-            if child.type == "assignment_expression":
-                # For named parameters
-                name_node, value_node = child.children[0], child.children[2]
-                name = get_text(name_node)
-                value = traverse_node(value_node)
-                if name in arguments:
-                    if not isinstance(arguments[name], list):
-                        arguments[name] = [arguments[name]]
-                    arguments[name].append(value)
-                else:
-                    arguments[name] = value
-                # arguments.append({'name': name, 'value': value})
-            elif child.type in ["identifier", "class_literal", "set"]:
-                # For unnamed parameters and handling sets
-                value = traverse_node(child)
-                if None in arguments:
-                    if not isinstance(arguments[None], list):
-                        arguments[None] = [arguments[None]]
-                    arguments[None].append(value)
-                else:
-                    arguments[None] = value
-        return arguments
-
-    def traverse(node):
-        if node.type == "method_invocation":
-            # Extract the function name and its arguments
-            method_name = get_text(node.child_by_field_name("name"))
-            class_name_node = node.child_by_field_name("object")
-            if class_name_node:
-                class_name = get_text(class_name_node)
-                function_name = f"{class_name}.{method_name}"
-            else:
-                function_name = method_name
-            arguments_node = node.child_by_field_name("arguments")
-            if arguments_node:
-                arguments = extract_arguments(arguments_node)
-                for key, value in arguments.items():
-                    if isinstance(value, list):
-                        raise Exception("Error: Multiple arguments with the same name are not supported.")
-                return [{function_name: arguments}]
-
-        else:
-            for child in node.children:
-                result = traverse(child)
-                if result:
-                    return result
-
-    result = traverse(root_node)
-    return result if result else {}
-
-
-def parse_javascript_function_call(source_code):
-    if not source_code.endswith(";"):
-        source_code += ";"  # Necessary for the parser not to register an error
-    parser = get_parser("javascript")
-    # Parse the source code
-    tree = parser.parse(bytes(source_code, "utf8"))
-    root_node = tree.root_node
-    if root_node.has_error:
-        raise Exception("Error js parsing the source code.")
-
-    # Function to recursively extract argument details
-    def extract_arguments(node):
-        args = {}
-        for child in node.children:
-            if child.type == "assignment_expression":
-                # Extract left (name) and right (value) parts of the assignment
-                name = child.children[0].text.decode("utf-8")
-                value = child.children[2].text.decode("utf-8")
-                if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
-                    value = value[1:-1]  # Trim the quotation marks
-                if name in args:
-                    if not isinstance(args[name], list):
-                        args[name] = [args[name]]
-                    args[name].append(value)
-                else:
-                    args[name] = value
-
-            elif child.type == "identifier" or child.type == "true":
-                # Handle non-named arguments and boolean values
-                value = child.text.decode("utf-8")
-                if None in args:
-                    if not isinstance(args[None], list):
-                        args[None] = [args[None]]
-                    args[None].append(value)
-                else:
-                    args[None] = value
-        return args
-
-    # Find the function call and extract its name and arguments
-    if root_node.type == "program":
-        for child in root_node.children:
-            if child.type == "expression_statement":
-                for sub_child in child.children:
-                    if sub_child.type == "call_expression":
-                        function_name = sub_child.children[0].text.decode("utf8")
-                        arguments_node = sub_child.children[1]
-                        parameters = extract_arguments(arguments_node)
-                        for key, value in parameters.items():
-                            if isinstance(value, list):
-                                raise Exception("Error: Multiple arguments with the same name are not supported.")
-                        result = [{function_name: parameters}]
-                        return result
-
-
-def ast_parse(input_str, language="Python"):
-    if language == "Python":
-        cleaned_input = input_str.strip("[]'")
-        parsed = ast.parse(cleaned_input, mode="eval")
-        extracted = []
-        if isinstance(parsed.body, ast.Call):
-            extracted.append(resolve_ast_call(parsed.body))
-        else:
-            for elem in parsed.body.elts:
-                extracted.append(resolve_ast_call(elem))
-        return extracted
-    elif language == "Java":
-        return parse_java_function_call(input_str[1:-1])  # Remove the [ and ] from the string
-    elif language == "JavaScript":
-        return parse_javascript_function_call(input_str[1:-1])
-    else:
-        raise NotImplementedError(f"Unsupported language: {language}")
-
-
-def resolve_ast_call(elem):
-    # Handle nested attributes for deeply nested module paths
-    func_parts = []
-    func_part = elem.func
-    while isinstance(func_part, ast.Attribute):
-        func_parts.append(func_part.attr)
-        func_part = func_part.value
-    if isinstance(func_part, ast.Name):
-        func_parts.append(func_part.id)
-    func_name = ".".join(reversed(func_parts))
-    args_dict = {}
-    # Parse when args are simply passed as an unnamed dictionary arg
-    for arg in elem.args:
-        if isinstance(arg, ast.Dict):
-            for key, value in zip(arg.keys, arg.values):
-                if isinstance(key, ast.Constant):
-                    arg_name = key.value
-                output = resolve_ast_by_type(value)
-                args_dict[arg_name] = output
-    for arg in elem.keywords:
-        output = resolve_ast_by_type(arg.value)
-        args_dict[arg.arg] = output
-    return {func_name: args_dict}
-
-
-def resolve_ast_by_type(value):
-    if isinstance(value, ast.Constant):
-        if value.value is Ellipsis:
-            output = "..."
-        else:
-            output = value.value
-    elif isinstance(value, ast.UnaryOp):
-        output = -value.operand.value
-    elif isinstance(value, ast.List):
-        output = [resolve_ast_by_type(v) for v in value.elts]
-    elif isinstance(value, ast.Dict):
-        output = {resolve_ast_by_type(k): resolve_ast_by_type(v) for k, v in zip(value.keys, value.values)}
-    elif isinstance(value, ast.NameConstant):  # Added this condition to handle boolean values
-        output = value.value
-    elif isinstance(value, ast.BinOp):  # Added this condition to handle function calls as arguments
-        output = eval(ast.unparse(value))
-    elif isinstance(value, ast.Name):
-        output = value.id
-    elif isinstance(value, ast.Call):
-        if len(value.keywords) == 0:
-            output = ast.unparse(value)
-        else:
-            output = resolve_ast_call(value)
-    elif isinstance(value, ast.Tuple):
-        output = tuple(resolve_ast_by_type(v) for v in value.elts)
-    elif isinstance(value, ast.Lambda):
-        output = eval(ast.unparse(value.body[0].value))
-    elif isinstance(value, ast.Ellipsis):
-        output = "..."
-    elif isinstance(value, ast.Subscript):
-        try:
-            output = ast.unparse(value.body[0].value)
-        except:
-            output = ast.unparse(value.value) + "[" + ast.unparse(value.slice) + "]"
-    else:
-        raise Exception(f"Unsupported AST type: {type(value)}")
-    return output
-
-
-def decode_ast(result, language="Python"):
-    func = result
-    func = func.replace("\n", "")  # remove new line characters
-    if not func.startswith("["):
-        func = "[" + func
-    if not func.endswith("]"):
-        func = func + "]"
-    decoded_output = ast_parse(func, language)
-    return decoded_output
-
-
-def decode_execute(result):
-    func = result
-    func = func.replace("\n", "")  # remove new line characters
-    if not func.startswith("["):
-        func = "[" + func
-    if not func.endswith("]"):
-        func = func + "]"
-    decode_output = ast_parse(func)
-    execution_list = []
-    for function_call in decode_output:
-        for key, value in function_call.items():
-            execution_list.append(f"{key}({','.join([f'{k}={repr(v)}' for k, v in value.items()])})")
-    return execution_list
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/checker.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/checker.py
@ -1,989 +0,0 @@
-# ruff: noqa
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import json
-import re
-import time
-from typing import Any
-
-# Comment out for now until we actually use the rest checker in evals
-# import requests  # Do not remove this import even though it seems to be unused. It's used in the executable_checker_rest function.
-
-
-class NoAPIKeyError(Exception):
-    def __init__(self):
-        self.message = "❗️Please fill in the API keys in the function_credential_config.json file. If you do not provide the API keys, the executable test category results will be inaccurate."
-        super().__init__(self.message)
-
-
-REAL_TIME_MATCH_ALLOWED_DIFFERENCE = 0.2
-
-
-JAVA_TYPE_CONVERSION = {
-    "byte": int,
-    "short": int,
-    "integer": int,
-    "float": float,
-    "double": float,
-    "long": int,
-    "boolean": bool,
-    "char": str,
-    "Array": list,
-    "ArrayList": list,
-    "Set": set,
-    "HashMap": dict,
-    "Hashtable": dict,
-    "Queue": list,  # this can be `queue.Queue` as well, for simplicity we check with list
-    "Stack": list,
-    "String": str,
-    "any": str,
-}
-
-JS_TYPE_CONVERSION = {
-    "String": str,
-    "integer": int,
-    "float": float,
-    "Bigint": int,
-    "Boolean": bool,
-    "dict": dict,
-    "array": list,
-    "any": str,
-}
-
-# We switch to conditional import for the following two imports to avoid unnecessary installations.
-# User doesn't need to setup the tree-sitter packages if they are not running the test for that language.
-# from js_type_converter import js_type_converter
-# from java_type_converter import java_type_converter
-
-PYTHON_TYPE_MAPPING = {
-    "string": str,
-    "integer": int,
-    "float": float,
-    "boolean": bool,
-    "array": list,
-    "tuple": list,
-    "dict": dict,
-    "any": str,
-}
-
-# This is the list of types that we need to recursively check its values
-PYTHON_NESTED_TYPE_CHECK_LIST = ["array", "tuple"]
-
-
-NESTED_CONVERSION_TYPE_LIST = ["Array", "ArrayList", "array"]
-
-
-#### Helper functions for AST ####
-def find_description(func_descriptions, name):
-    if type(func_descriptions) == list:
-        for func_description in func_descriptions:
-            if func_description["name"] == name:
-                return func_description
-        return None
-    else:
-        # it is a dict, there is only one function
-        return func_descriptions
-
-
-def get_possible_answer_type(possible_answer: list):
-    for answer in possible_answer:
-        if answer != "":  # Optional parameter
-            return type(answer)
-    return None
-
-
-def type_checker(
-    param: str,
-    value,
-    possible_answer: list,
-    expected_type_description: str,
-    expected_type_converted,
-    nested_type_converted,
-):
-    # NOTE: This type checker only supports nested type checking for one level deep.
-    # We didn't implement recursive type checking for nested types, as it's not needed for the current use case and it's very complex.
-
-    result: Any = {
-        "valid": True,
-        "error": [],
-        "is_variable": False,
-        "error_type": "type_error:simple",
-    }
-
-    is_variable = False
-    # check for the case where a variable is used instead of a actual value.
-    # use the type in possible_answer as the expected type
-    possible_answer_type = get_possible_answer_type(possible_answer)
-    # if possible_answer only contains optional parameters, we can't determine the type
-    if possible_answer_type != None:
-        # we are being precise here.
-        # in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
-        if possible_answer_type != expected_type_converted:
-            is_variable = True
-
-    # value is the same type as in function description
-    if type(value) == expected_type_converted:
-        # We don't need to do recursive check for simple types
-        if nested_type_converted == None:
-            result["is_variable"] = is_variable
-            return result
-        else:
-            for possible_answer_item in possible_answer:
-                flag = True  # Each parameter should match to at least one possible answer type.
-                # Here, we assume that each item should be the same type. We could also relax it.
-                if type(possible_answer_item) == list:
-                    for value_item in value:
-                        checker_result = type_checker(
-                            param,
-                            value_item,
-                            possible_answer_item,
-                            str(nested_type_converted),
-                            nested_type_converted,
-                            None,
-                        )
-                        if not checker_result["valid"]:
-                            flag = False
-                            break
-
-                if flag:
-                    return {"valid": True, "error": [], "is_variable": is_variable}
-
-            result["valid"] = False
-            result["error"] = [
-                f"Nested type checking failed for parameter {repr(param)}. Expected outer type {expected_type_description} with inner type {str(nested_type_converted)}. Parameter value: {repr(value)}."
-            ]
-            result["error_type"] = "type_error:nested"
-
-    # value is not as expected, check for the case where a variable is used instead of a actual value
-    # use the type in possible_answer as the expected type
-    possible_answer_type = get_possible_answer_type(possible_answer)
-    # if possible_answer only contains optional parameters, we can't determine the type
-    if possible_answer_type != None:
-        # we are being precise here.
-        # in fact, possible_answer_type should always be string, as that's how we treat varibale in possible_answer
-        if type(value) == possible_answer_type:
-            result["is_variable"] = True
-            return result
-
-    result["valid"] = False
-    result["error"].append(
-        f"Incorrect type for parameter {repr(param)}. Expected type {expected_type_description}, got {type(value).__name__}. Parameter value: {repr(value)}."
-    )
-    result["error_type"] = "type_error:simple"
-    return result
-
-
-def standardize_string(input_string: str):
-    # This function standardizes the string by removing all the spaces, ",./-_*^" punctuation, and converting it to lowercase
-    # It will also convert all the single quotes to double quotes
-    # This is used to compare the model output with the possible answers
-    # We don't want to punish model for answer like April 1, 2024 vs April 1,2024, vs April 1 2024
-    regex_string = r"[ \,\.\/\-\_\*\^]"
-    return re.sub(regex_string, "", input_string).lower().replace("'", '"')
-
-
-def string_checker(param: str, model_output: str, possible_answer: list):
-    standardize_possible_answer = []
-    standardize_model_output = standardize_string(model_output)
-    for i in range(len(possible_answer)):
-        if type(possible_answer[i]) == str:
-            standardize_possible_answer.append(standardize_string(possible_answer[i]))
-
-    if standardize_model_output not in standardize_possible_answer:
-        return {
-            "valid": False,
-            "error": [
-                f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}. Case insensitive."
-            ],
-            "error_type": "value_error:string",
-        }
-
-    return {"valid": True, "error": []}
-
-
-def list_checker(param: str, model_output: list, possible_answer: list):
-    # Convert the tuple to a list
-
-    standardize_model_output = list(model_output)
-
-    # If the element in the list is a string, we need to standardize it
-    for i in range(len(standardize_model_output)):
-        if type(standardize_model_output[i]) == str:
-            standardize_model_output[i] = standardize_string(model_output[i])
-
-    standardize_possible_answer: Any = []
-    # We also need to standardize the possible answers
-    for i in range(len(possible_answer)):
-        standardize_possible_answer.append([])
-        for j in range(len(possible_answer[i])):
-            if type(possible_answer[i][j]) == str:
-                standardize_possible_answer[i].append(standardize_string(possible_answer[i][j]))
-            else:
-                standardize_possible_answer[i].append(possible_answer[i][j])
-
-    if standardize_model_output not in standardize_possible_answer:
-        return {
-            "valid": False,
-            "error": [
-                f"Invalid value for parameter {repr(param)}: {repr(model_output)}. Expected one of {possible_answer}."
-            ],
-            "error_type": "value_error:list/tuple",
-        }
-
-    return {"valid": True, "error": []}
-
-
-def dict_checker(param: str, model_output: dict, possible_answers: list):
-    # This function works for simple dictionaries, but not dictionaries with nested dictionaries.
-    # The current dataset only contains simple dictionaries, so this is sufficient.
-
-    result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
-    for i in range(len(possible_answers)):
-        if possible_answers[i] == "":
-            continue
-
-        result = {"valid": False, "error": [], "error_type": "dict_checker:unclear"}
-
-        flag = True
-
-        possible_answer = possible_answers[i]
-        # possible_anwer is a single dictionary
-
-        for key, value in model_output.items():
-            if key not in possible_answer:
-                result["valid"] = False
-                result["error"].append(f"Unexpected dict key parameter: '{key}'.")  # type: ignore[attr-defined]
-                result["error_type"] = "value_error:dict_key"
-                flag = False
-                break
-
-            standardize_value = value
-            # If the value is a string, we need to standardize it
-            if type(value) == str:
-                standardize_value = standardize_string(value)
-
-            # We also need to standardize the possible answers if they are string
-            standardize_possible_answer = []
-            for i in range(len(possible_answer[key])):
-                if type(possible_answer[key][i]) == str:
-                    standardize_possible_answer.append(standardize_string(possible_answer[key][i]))
-                else:
-                    standardize_possible_answer.append(possible_answer[key][i])
-
-            if standardize_value not in standardize_possible_answer:
-                result["valid"] = False
-                result["error"].append(  # type: ignore[attr-defined]
-                    f"Invalid value for parameter {repr(key)}: {repr(value)}. Expected one of {standardize_possible_answer}."
-                )
-                result["error_type"] = "value_error:dict_value"
-                flag = False
-                break
-
-        for key, value in possible_answer.items():
-            if key not in model_output and "" not in value:
-                result["valid"] = False
-                result["error"].append(f"Missing dict key parameter: '{key}'.")  # type: ignore[attr-defined]
-                result["error_type"] = "value_error:dict_key"
-                flag = False
-                break
-
-        if flag:
-            return {"valid": True, "error": []}
-
-    return result
-
-
-def list_dict_checker(param: str, model_output: list, possible_answers: list):
-    # This function takes in a list of dictionaries and checks if each dictionary is valid
-    # The order of the dictionaries in the list must match the order of the possible answers
-
-    result = {"valid": False, "error": [], "error_type": "list_dict_checker:unclear"}
-
-    for answer_index in range(len(possible_answers)):
-        flag = True  # True means so far, all dictionaries are valid
-
-        # Only proceed if the number of dictionaries in the list matches the number of dictionaries in the possible answers
-        if len(model_output) != len(possible_answers[answer_index]):
-            result["valid"] = False
-            result["error"] = ["Wrong number of dictionaries in the list."]
-            result["error_type"] = "value_error:list_dict_count"
-            flag = False
-            continue
-
-        for dict_index in range(len(model_output)):
-            result = dict_checker(
-                param,
-                model_output[dict_index],
-                [possible_answers[answer_index][dict_index]],
-            )
-            if not result["valid"]:
-                flag = False
-                break
-        if flag:
-            return {"valid": True, "error": []}
-
-    return result
-
-
-def simple_function_checker(
-    func_description: dict,
-    model_output: dict,
-    possible_answer: dict,
-    language: str,
-    model_name: str,
-):
-    possible_answer = list(possible_answer.values())[0]
-    # Extract function name and parameters details
-    func_name = func_description["name"]
-    param_details = func_description["parameters"]["properties"]
-    required_params = func_description["parameters"]["required"]
-
-    # Initialize a result dictionary
-    result = {
-        "valid": True,
-        "error": [],
-        "error_type": "simple_function_checker:unclear",
-    }
-
-    # Check if function name matches
-    if func_name not in model_output:
-        result["valid"] = False
-        result["error"].append(  # type: ignore[attr-defined]
-            f"Function name {repr(func_name)} not found in model output."
-        )
-        result["error_type"] = "simple_function_checker:wrong_func_name"
-        return result
-
-    model_params = model_output[func_name]
-
-    # Check for required parameters in model output
-    for param in required_params:
-        if param not in model_params:
-            result["valid"] = False
-            result["error"].append(f"Missing required parameter: {repr(param)}.")  # type: ignore[attr-defined]
-            result["error_type"] = "simple_function_checker:missing_required"
-            return result
-
-    # Validate types and values for each parameter in model output
-    for param, value in model_params.items():
-        if param not in param_details or param not in possible_answer:
-            result["valid"] = False
-            result["error"].append(f"Unexpected parameter: {repr(param)}.")  # type: ignore[attr-defined]
-            result["error_type"] = "simple_function_checker:unexpected_param"
-            return result
-
-        full_param_details = param_details[param]
-        expected_type_description = full_param_details["type"]  # This is a string
-        is_variable = False
-        nested_type_converted = None
-
-        if language == "Java":
-            from evals.utils.bfcl.java_type_converter import java_type_converter
-
-            expected_type_converted = JAVA_TYPE_CONVERSION[expected_type_description]
-
-            if expected_type_description in JAVA_TYPE_CONVERSION:
-                if type(value) != str:
-                    result["valid"] = False
-                    result["error"].append(  # type: ignore[attr-defined]
-                        f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
-                    )
-                    result["error_type"] = "type_error:java"
-                    return result
-
-                if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
-                    nested_type = param_details[param]["items"]["type"]
-                    nested_type_converted = JAVA_TYPE_CONVERSION[nested_type]
-                    value = java_type_converter(value, expected_type_description, nested_type)
-                else:
-                    value = java_type_converter(value, expected_type_description)
-
-        elif language == "JavaScript":
-            from evals.utils.bfcl.js_type_converter import js_type_converter
-
-            expected_type_converted = JS_TYPE_CONVERSION[expected_type_description]
-
-            if expected_type_description in JS_TYPE_CONVERSION:
-                if type(value) != str:
-                    result["valid"] = False
-                    result["error"].append(  # type: ignore[attr-defined]
-                        f"Incorrect type for parameter {repr(param)}. Expected type String, got {type(value).__name__}. Parameter value: {repr(value)}."
-                    )
-                    result["error_type"] = "type_error:js"
-                    return result
-
-                if expected_type_description in NESTED_CONVERSION_TYPE_LIST:
-                    nested_type = param_details[param]["items"]["type"]
-                    nested_type_converted = JS_TYPE_CONVERSION[nested_type]
-                    value = js_type_converter(value, expected_type_description, nested_type)
-                else:
-                    value = js_type_converter(value, expected_type_description)
-
-        elif language == "Python":
-            expected_type_converted = PYTHON_TYPE_MAPPING[expected_type_description]
-            if expected_type_description in PYTHON_NESTED_TYPE_CHECK_LIST:
-                nested_type = param_details[param]["items"]["type"]
-                nested_type_converted = PYTHON_TYPE_MAPPING[nested_type]
-
-        # We convert all tuple value to list when the expected type is tuple.
-        # The conversion is necessary because any tuple in the possible answer would become a list after being processed through json.dump() and json.load().
-        # This does introduce some false positive (eg, when the model provides a list value instead of tuple). We hope to find a better solution in the future.
-        if expected_type_description == "tuple" and type(value) == tuple:
-            value = list(value)
-
-        # Allow python auto conversion from int to float
-        if language == "Python" and expected_type_description == "float" and type(value) == int:
-            value = float(value)
-
-        # Type checking
-        # In fact, we only check for Python here.
-        # Type check for other languages are handled by the type converter, and so their value (after conversion) is always correct.
-        type_check_result = type_checker(
-            param,
-            value,
-            possible_answer[param],
-            expected_type_description,
-            expected_type_converted,
-            nested_type_converted,
-        )
-        is_variable = type_check_result["is_variable"]
-        if not type_check_result["valid"]:
-            return type_check_result
-
-        # It doesn't make sense to special handle dictionaries and list of dictionaries if the value is a variable.
-        # We can just treat the variable as a string and use the normal flow.
-        if not is_variable:
-            # Special handle for dictionaries
-            if expected_type_converted == dict:
-                result = dict_checker(param, value, possible_answer[param])
-                if not result["valid"]:
-                    return result
-                continue
-
-            # Special handle for list of dictionaries
-            elif expected_type_converted == list and nested_type_converted == dict:
-                result = list_dict_checker(param, value, possible_answer[param])
-                if not result["valid"]:
-                    return result
-                continue
-
-            # Special handle for strings
-            elif expected_type_converted == str:
-                # We don't check for case sensitivity for string, as long as it's not a variable
-                result = string_checker(param, value, possible_answer[param])
-                if not result["valid"]:
-                    return result
-                continue
-
-            elif expected_type_converted == list:
-                result = list_checker(param, value, possible_answer[param])
-                if not result["valid"]:
-                    return result
-                continue
-
-        # Check if the value is within the possible answers
-        if value not in possible_answer[param]:
-            result["valid"] = False
-            result["error"].append(  # type: ignore[attr-defined]
-                f"Invalid value for parameter {repr(param)}: {repr(value)}. Expected one of {possible_answer[param]}."
-            )
-            result["error_type"] = "value_error:others"
-            return result
-
-    # Check for optional parameters not provided but allowed
-    for param in possible_answer:
-        if param not in model_params and "" not in possible_answer[param]:
-            result["valid"] = False
-            result["error"].append(  # type: ignore[attr-defined]
-                f"Optional parameter {repr(param)} not provided and not marked as optional."
-            )
-            result["error_type"] = "simple_function_checker:missing_optional"
-            return result
-
-    return result
-
-
-def parallel_function_checker_enforce_order(
-    func_descriptions: list,
-    model_output: list,
-    possible_answers: dict,
-    language: str,
-    model_name: str,
-):
-    if len(model_output) != len(possible_answers):
-        return {
-            "valid": False,
-            "error": ["Wrong number of functions."],
-            "error_type": "parallel_function_checker_enforce_order:wrong_count",
-        }
-
-    func_name_list = list(possible_answers.keys())
-    possible_answers_list = []
-
-    for key, value in possible_answers.items():
-        possible_answers_list.append({key: value})
-
-    for i in range(len(possible_answers_list)):
-        func_description = find_description(func_descriptions, func_name_list[i])
-
-        result = simple_function_checker(
-            func_description,
-            model_output[i],
-            possible_answers_list[i],
-            language,
-            model_name,
-        )
-        if not result["valid"]:
-            return result
-
-    return {"valid": True, "error": []}
-
-
-def parallel_function_checker_no_order(
-    func_descriptions: list,
-    model_output: list,
-    possible_answers: list,
-    language: str,
-    model_name: str,
-):
-    if len(model_output) != len(possible_answers):
-        return {
-            "valid": False,
-            "error": ["Wrong number of functions."],
-            "error_type": "parallel_function_checker_no_order:wrong_count",
-        }
-
-    matched_indices = []
-
-    # We go throught the possible answers one by one, and eliminate the model output that matches the possible answer
-    # It must be this way because we need ground truth to fetch the correct function description
-    for i in range(len(possible_answers)):
-        # possible_answers[i] is a dictionary with only one key
-        func_name_expected = list(possible_answers[i].keys())[0]
-        func_description = find_description(func_descriptions, func_name_expected)
-
-        all_errors = []
-
-        for index in range(len(model_output)):
-            if index in matched_indices:
-                continue
-
-            result = simple_function_checker(
-                func_description,
-                model_output[index],
-                possible_answers[i],
-                language,
-                model_name,
-            )
-
-            if result["valid"]:
-                matched_indices.append(index)
-                break
-            else:
-                all_errors.append(
-                    {
-                        f"Model Result Index {index}": {
-                            "sub_error": result["error"],
-                            "sub_error_type": result["error_type"],
-                            "model_output_item": model_output[index],
-                            "possible_answer_item": possible_answers[i],
-                        }
-                    }
-                )
-
-        if not result["valid"]:
-            considered_indices = [i for i in range(len(model_output)) if i not in matched_indices]
-            all_errors.insert(
-                0,
-                f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.",  # type: ignore[arg-type]
-            )
-            return {
-                "valid": False,
-                "error": all_errors,
-                "error_type": "parallel_function_checker_no_order:cannot_find_match",
-            }
-
-    return {"valid": True, "error": []}
-
-
-def multiple_function_checker(
-    func_descriptions: list,
-    model_output: list,
-    possible_answers: list,
-    language: str,
-    model_name: str,
-):
-    if len(model_output) != len(possible_answers):
-        return {
-            "valid": False,
-            "error": ["Wrong number of functions."],
-            "error_type": "multiple_function_checker:wrong_count",
-        }
-
-    # possible_answers is a list of only one dictionary with only one key
-    func_name_expected = list(possible_answers[0].keys())[0]
-    func_description = find_description(func_descriptions, func_name_expected)
-    return simple_function_checker(
-        func_description,
-        model_output[0],
-        possible_answers[0],
-        language,
-        model_name,
-    )
-
-
-def patten_matcher(exec_output, expected_result, function_call, is_sanity_check):
-    result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
-
-    if type(exec_output) != type(expected_result):
-        return {
-            "valid": False,
-            "error": [
-                f"Wrong execution result type for {repr(function_call)}. Expected type: {type(expected_result)}, but got: {type(exec_output)}."
-            ],
-            "error_type": "executable_checker:wrong_result_type",
-            "model_executed_output": exec_output,
-        }
-    if type(exec_output) == dict:
-        # We loose the requirement for the sanity check as the expected result used in the sanity check might not be the most up-to-date one.
-        # This happens when the key is a timestamp or a random number.
-        if is_sanity_check:
-            if len(exec_output) != len(expected_result):
-                return {
-                    "valid": False,
-                    "error": [
-                        f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
-                    ],
-                    "error_type": "executable_checker:wrong_result_type:dict_length",
-                    "model_executed_output": exec_output,
-                }
-            else:
-                return result
-
-        for key, value in expected_result.items():
-            if key not in exec_output:
-                return {
-                    "valid": False,
-                    "error": [
-                        f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not found in the model output."
-                    ],
-                    "error_type": "executable_checker:wrong_result_type:dict_key_not_found",
-                    "model_executed_output": exec_output,
-                }
-        for key, value in exec_output.items():
-            if key not in expected_result:
-                return {
-                    "valid": False,
-                    "error": [
-                        f"Wrong execution result pattern for {repr(function_call)}. Expect type Dict, but key {repr(key)} not expected in the model output."
-                    ],
-                    "error_type": "executable_checker:wrong_result_type:dict_extra_key",
-                    "model_executed_output": exec_output,
-                }
-    if type(exec_output) == list:
-        if len(exec_output) != len(expected_result):
-            return {
-                "valid": False,
-                "error": [
-                    f"Wrong execution result pattern for {repr(function_call)}. Expect type list, but wrong number of elements in the output. Expected length: {len(expected_result)}, but got: {len(exec_output)}."
-                ],
-                "error_type": "executable_checker:wrong_result_type:list_length",
-                "model_executed_output": exec_output,
-            }
-    return result
-
-
-#### Helper functions for Exec ####
-def executable_checker_simple(
-    function_call: str,
-    expected_result,
-    expected_result_type: str,
-    is_sanity_check=False,
-):
-    result = {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
-
-    exec_dict: Any = {}
-
-    try:
-        exec(
-            "from executable_python_function import *" + "\nresult=" + function_call,
-            exec_dict,
-        )
-        exec_output = exec_dict["result"]
-    except NoAPIKeyError as e:
-        raise e
-    except Exception as e:
-        result["valid"] = False
-        result["error"].append(  # type: ignore[attr-defined]
-            f"Error in execution: {repr(function_call)}. Error: {str(e)}"
-        )
-        result["error_type"] = "executable_checker:execution_error"
-        return result
-
-    # We need to special handle the case where the execution result is a tuple and convert it to a list
-    # Because when json is stored, the tuple is converted to a list, and so the expected result is a list when loaded from json
-    if isinstance(exec_output, tuple):
-        exec_output = list(exec_output)
-
-    if expected_result_type == "exact_match":
-        if exec_output != expected_result:
-            result["valid"] = False
-            result["error"].append(  # type: ignore[attr-defined]
-                f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}."
-            )
-            result["error_type"] = "executable_checker:wrong_result"
-            result["model_executed_output"] = exec_output
-            return result
-
-    elif expected_result_type == "real_time_match":
-        # Allow for 5% difference
-        if (type(expected_result) == float or type(expected_result) == int) and (
-            type(exec_output) == float or type(exec_output) == int
-        ):
-            if not (
-                expected_result * (1 - REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
-                <= exec_output
-                <= expected_result * (1 + REAL_TIME_MATCH_ALLOWED_DIFFERENCE)
-            ):
-                result["valid"] = False
-                result["error"].append(  # type: ignore[attr-defined]
-                    f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. {REAL_TIME_MATCH_ALLOWED_DIFFERENCE * 100}% difference allowed."
-                )
-                result["error_type"] = "executable_checker:wrong_result_real_time"
-                result["model_executed_output"] = exec_output
-                return result
-        else:
-            result["valid"] = False
-            result["error"].append(  # type: ignore[attr-defined]
-                f"Wrong execution result for {repr(function_call)}. Expected: {expected_result}, but got: {exec_output}. Type needs to be float or int for real time match criteria."
-            )
-            result["error_type"] = "executable_checker:wrong_result_real_time"
-            result["model_executed_output"] = exec_output
-            return result
-
-    else:
-        # structural match
-        pattern_match_result = patten_matcher(exec_output, expected_result, function_call, is_sanity_check)
-        if not pattern_match_result["valid"]:
-            return pattern_match_result
-
-    return result
-
-
-def executable_checker_parallel_no_order(
-    decoded_result: list, expected_exec_result: list, expected_exec_result_type: list
-):
-    if len(decoded_result) != len(expected_exec_result):
-        return {
-            "valid": False,
-            "error": [
-                f"Wrong number of functions provided. Expected {len(expected_exec_result)}, but got {len(decoded_result)}."
-            ],
-            "error_type": "value_error:exec_result_count",
-        }
-
-    matched_indices = []
-    for i in range(len(expected_exec_result)):
-        all_errors = []
-        for index in range(len(decoded_result)):
-            if index in matched_indices:
-                continue
-
-            result = executable_checker_simple(
-                decoded_result[index],
-                expected_exec_result[i],
-                expected_exec_result_type[i],
-                False,
-            )
-
-            if result["valid"]:
-                matched_indices.append(index)
-                break
-            else:
-                all_errors.append(
-                    {
-                        f"Model Result Index {index}": {
-                            "sub_error": result["error"],
-                            "sub_error_type": result["error_type"],
-                            "model_executed_output": (
-                                result["model_executed_output"] if "model_executed_output" in result else None
-                            ),
-                        }
-                    }
-                )
-
-        if not result["valid"]:
-            considered_indices = [i for i in range(len(decoded_result)) if i not in matched_indices]
-            all_errors.insert(
-                0,
-                f"Could not find a matching function among index {considered_indices} of model output for index {i} of possible answers.",  # type: ignore[arg-type]
-            )
-            return {
-                "valid": False,
-                "error": all_errors,
-                "error_type": "executable_checker:cannot_find_match",
-            }
-
-    return {"valid": True, "error": [], "error_type": "executable_checker:unclear"}
-
-
-#### Main function ####
-def executable_checker_rest(func_call, idx):
-    # Move this here for now to avoid needing to read this file / fix paths to be relative to dataset_dir. Fix when it's actually needed / used.
-    EVAL_GROUND_TRUTH_PATH = "/mnt/wsfuse/fair_llm_v2/datasets/eval/bfcl/rest-eval-response_v5.jsonl"  # Ground truth file for v5 for rest execution
-    with open(EVAL_GROUND_TRUTH_PATH, "r") as f:
-        EVAL_GROUND_TRUTH = f.readlines()
-    if "https://geocode.maps.co" in func_call:
-        time.sleep(2)
-    if "requests_get" in func_call:
-        func_call = func_call.replace("requests_get", "requests.get")
-    try:
-        response = eval(func_call)
-    except Exception as e:
-        return {
-            "valid": False,
-            "error": [f"Execution failed. {str(e)}"],
-            "error_type": "executable_checker_rest:execution_error",
-        }
-
-    try:
-        if response.status_code == 200:
-            eval_GT_json = json.loads(EVAL_GROUND_TRUTH[idx])
-            try:
-                if isinstance(eval_GT_json, dict):
-                    if isinstance(response.json(), dict):
-                        if set(eval_GT_json.keys()) == set(response.json().keys()):
-                            return {"valid": True, "error": [], "error_type": ""}
-                        return {
-                            "valid": False,
-                            "error": ["Key inconsistency"],
-                            "error_type": "executable_checker_rest:wrong_key",
-                        }
-                    return {
-                        "valid": False,
-                        "error": [f"Expected dictionary, but got {type(response.json())}"],
-                        "error_type": "executable_checker_rest:wrong_type",
-                    }
-
-                elif isinstance(eval_GT_json, list):
-                    if isinstance(response.json(), list):
-                        if len(eval_GT_json) != len(response.json()):
-                            return {
-                                "valid": False,
-                                "error": [f"Response list length inconsistency."],
-                                "error_type": "value_error:exec_result_rest_count",
-                            }
-
-                        else:
-                            for i in range(len(eval_GT_json)):
-                                if set(eval_GT_json[i].keys()) != set(response.json()[i].keys()):
-                                    return {
-                                        "valid": False,
-                                        "error": [f"Key inconsistency"],
-                                        "error_type": "executable_checker_rest:wrong_key",
-                                    }
-
-                            return {"valid": True, "error": []}
-                    else:
-                        return {
-                            "valid": False,
-                            "error": [f"Expected list, but got {type(response.json())}"],
-                            "error_type": "executable_checker_rest:wrong_type",
-                        }
-                return {
-                    "valid": False,
-                    "error": [f"Expected dict or list, but got {type(response.json())}"],
-                    "error_type": "executable_checker_rest:wrong_type",
-                }
-            except Exception as e:
-                return {
-                    "valid": False,
-                    "error": [
-                        f"Error in execution and type checking. Status code: {response.status_code}. Error: {str(e)}"
-                    ],
-                    "error_type": "executable_checker_rest:response_format_error",
-                }
-        else:
-            return {
-                "valid": False,
-                "error": [f"Execution result status code is not 200, got {response.status_code}"],
-                "error_type": "executable_checker_rest:wrong_status_code",
-            }
-    except Exception as e:
-        return {
-            "valid": False,
-            "error": [f"Cannot get status code of the response. Error: {str(e)}"],
-            "error_type": "executable_checker_rest:cannot_get_status_code",
-        }
-
-
-def ast_checker(func_description, model_output, possible_answer, language, test_category, model_name):
-    if "parallel" in test_category:
-        return parallel_function_checker_no_order(func_description, model_output, possible_answer, language, model_name)
-
-    elif "multiple" in test_category:
-        return multiple_function_checker(func_description, model_output, possible_answer, language, model_name)
-
-    else:
-        if len(model_output) != 1:
-            return {
-                "valid": False,
-                "error": ["Wrong number of functions."],
-                "error_type": "simple_function_checker:wrong_count",
-            }
-
-        return simple_function_checker(
-            func_description[0],
-            model_output[0],
-            possible_answer[0],
-            language,
-            model_name,
-        )
-
-
-def exec_checker(decoded_result: list, func_description: dict, test_category: str):
-    if "multiple" in test_category or "parallel" in test_category:
-        return executable_checker_parallel_no_order(
-            decoded_result,
-            func_description["execution_result"],
-            func_description["execution_result_type"],
-        )
-
-    else:
-        if len(decoded_result) != 1:
-            return {
-                "valid": False,
-                "error": ["Wrong number of functions."],
-                "error_type": "simple_exec_checker:wrong_count",
-            }
-        return executable_checker_simple(
-            decoded_result[0],
-            func_description["execution_result"][0],
-            func_description["execution_result_type"][0],
-            False,
-        )
-
-
-def is_empty_output(decoded_output):
-    # This function is a patch to the ast decoder for relevance detection
-    # Sometimes the ast decoder will parse successfully, but the input doens't really have a function call
-    # [], [{}], and anything that is not in function calling format is considered empty (and thus should be marked as correct)
-    if not is_function_calling_format_output(decoded_output):
-        return True
-    if len(decoded_output) == 0:
-        return True
-    if len(decoded_output) == 1 and len(decoded_output[0]) == 0:
-        return True
-
-
-def is_function_calling_format_output(decoded_output):
-    # Ensure the output is a list of dictionaries
-    if type(decoded_output) == list:
-        for item in decoded_output:
-            if type(item) != dict:
-                return False
-        return True
-    return False
--- a/llama_stack/providers/inline/scoring/basic/utils/bfcl/tree_sitter.py
+++ b/llama_stack/providers/inline/scoring/basic/utils/bfcl/tree_sitter.py
@ -1,40 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Tree-sitter changes its API with unfortunate frequency. Modules that need it should
-import it from here so that we can centrally manage things as necessary.
-"""
-
-# These currently work with tree-sitter 0.23.0
-# NOTE: Don't import tree-sitter or any of the language modules in the main module
-# because not all environments have them. Import lazily inside functions where needed.
-
-import importlib
-import typing
-
-if typing.TYPE_CHECKING:
-    import tree_sitter
-
-
-def get_language(language: str) -> "tree_sitter.Language":
-    import tree_sitter
-
-    language_module_name = f"tree_sitter_{language}"
-    try:
-        language_module = importlib.import_module(language_module_name)
-    except ModuleNotFoundError as exc:
-        raise ValueError(
-            f"Language {language} is not found. Please install the tree-sitter-{language} package."
-        ) from exc
-    return tree_sitter.Language(language_module.language())
-
-
-def get_parser(language: str, **kwargs) -> "tree_sitter.Parser":
-    import tree_sitter
-
-    lang = get_language(language)
-    return tree_sitter.Parser(lang, **kwargs)
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -30,11 +30,11 @@ from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
 from llama_stack.providers.utils.memory.vector_store import (
    RERANKER_TYPE_RRF,
-    RERANKER_TYPE_WEIGHTED,
    ChunkForDeletion,
    EmbeddingIndex,
    VectorDBWithIndex,
 )
+from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator

 logger = get_logger(name=__name__, category="vector_io")

@ -66,59 +66,6 @@ def _create_sqlite_connection(db_path):
    return connection


-def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
-    """Normalize scores to [0,1] range using min-max normalization."""
-    if not scores:
-        return {}
-    min_score = min(scores.values())
-    max_score = max(scores.values())
-    score_range = max_score - min_score
-    if score_range > 0:
-        return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
-    return dict.fromkeys(scores, 1.0)
-
-
-def _weighted_rerank(
-    vector_scores: dict[str, float],
-    keyword_scores: dict[str, float],
-    alpha: float = 0.5,
-) -> dict[str, float]:
-    """ReRanker that uses weighted average of scores."""
-    all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
-    normalized_vector_scores = _normalize_scores(vector_scores)
-    normalized_keyword_scores = _normalize_scores(keyword_scores)
-
-    return {
-        doc_id: (alpha * normalized_keyword_scores.get(doc_id, 0.0))
-        + ((1 - alpha) * normalized_vector_scores.get(doc_id, 0.0))
-        for doc_id in all_ids
-    }
-
-
-def _rrf_rerank(
-    vector_scores: dict[str, float],
-    keyword_scores: dict[str, float],
-    impact_factor: float = 60.0,
-) -> dict[str, float]:
-    """ReRanker that uses Reciprocal Rank Fusion."""
-    # Convert scores to ranks
-    vector_ranks = {
-        doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
-    }
-    keyword_ranks = {
-        doc_id: i + 1 for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
-    }
-
-    all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
-    rrf_scores = {}
-    for doc_id in all_ids:
-        vector_rank = vector_ranks.get(doc_id, float("inf"))
-        keyword_rank = keyword_ranks.get(doc_id, float("inf"))
-        # RRF formula: score = 1/(k + r) where k is impact_factor and r is the rank
-        rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
-    return rrf_scores
-
-
 def _make_sql_identifier(name: str) -> str:
    return re.sub(r"[^a-zA-Z0-9_]", "_", name)

@ -398,14 +345,10 @@ class SQLiteVecIndex(EmbeddingIndex):
            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
        }

-        # Combine scores using the specified reranker
-        if reranker_type == RERANKER_TYPE_WEIGHTED:
-            alpha = reranker_params.get("alpha", 0.5)
-            combined_scores = _weighted_rerank(vector_scores, keyword_scores, alpha)
-        else:
-            # Default to RRF for None, RRF, or any unknown types
-            impact_factor = reranker_params.get("impact_factor", 60.0)
-            combined_scores = _rrf_rerank(vector_scores, keyword_scores, impact_factor)
+        # Combine scores using the reranking utility
+        combined_scores = WeightedInMemoryAggregator.combine_search_results(
+            vector_scores, keyword_scores, reranker_type, reranker_params
+        )

        # Sort by combined score and get top k results
        sorted_items = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -292,7 +292,7 @@ Available Models:
            api=Api.inference,
            adapter=AdapterSpec(
                adapter_type="watsonx",
-                pip_packages=["ibm_watson_machine_learning"],
+                pip_packages=["ibm_watsonx_ai"],
                module="llama_stack.providers.remote.inference.watsonx",
                config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
                provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@ -404,6 +404,60 @@ That means you'll get fast and efficient vector retrieval.
 - Easy to use
 - Fully integrated with Llama Stack

+There are three implementations of search for PGVectoIndex available:
+
+1. Vector Search:
+- How it works:
+  - Uses PostgreSQL's vector extension (pgvector) to perform similarity search
+  - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
+  - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
+
+-Characteristics:
+  - Semantic understanding - finds documents similar in meaning even if they don't share keywords
+  - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
+  - Best for: Finding conceptually related content, handling synonyms, cross-language search
+
+2. Keyword Search
+- How it works:
+  - Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
+  - Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
+  - Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
+
+- Characteristics:
+  - Lexical matching - finds exact keyword matches and variations
+  - Uses GIN (Generalized Inverted Index) for fast text search performance
+  - Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
+  - Best for: Exact term matching, proper names, technical terms, Boolean-style queries
+
+3. Hybrid Search
+- How it works:
+  - Combines both vector and keyword search results
+  - Runs both searches independently, then merges results using configurable reranking
+
+- Two reranking strategies available:
+    - Reciprocal Rank Fusion (RRF) - (default: 60.0)
+    - Weighted Average - (default: 0.5)
+
+- Characteristics:
+  - Best of both worlds: semantic understanding + exact matching
+  - Documents appearing in both searches get boosted scores
+  - Configurable balance between semantic and lexical matching
+  - Best for: General-purpose search where you want both precision and recall
+
+4. Database Schema
+The PGVector implementation stores data optimized for all three search types:
+CREATE TABLE vector_store_xxx (
+    id TEXT PRIMARY KEY,
+    document JSONB,                    -- Original document
+    embedding vector(dimension),        -- For vector search
+    content_text TEXT,                 -- Raw text content
+    tokenized_content TSVECTOR          -- For keyword search
+);
+
+-- Indexes for performance
+CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content);  -- Keyword search
+-- Vector index created automatically by pgvector
+
 ## Usage

 To use PGVector in your Llama Stack project, follow these steps:
@ -412,6 +466,25 @@ To use PGVector in your Llama Stack project, follow these steps:
 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
 3. Start storing and querying vectors.

+## This is an example how you can set up your environment for using PGVector
+
+1. Export env vars:
+```bash
+export ENABLE_PGVECTOR=true
+export PGVECTOR_HOST=localhost
+export PGVECTOR_PORT=5432
+export PGVECTOR_DB=llamastack
+export PGVECTOR_USER=llamastack
+export PGVECTOR_PASSWORD=llamastack
+```
+
+2. Create DB:
+```bash
+psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
+psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
+psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
+```
+
 ## Installation

 You can install PGVector using docker:
@ -449,6 +522,7 @@ Weaviate supports:
 - Metadata filtering
 - Multi-modal retrieval

+
 ## Usage

 To use Weaviate in your Llama Stack project, follow these steps:
--- a/llama_stack/providers/remote/files/s3/init.py
+++ b/llama_stack/providers/remote/files/s3/init.py
@ -6,15 +6,14 @@

 from typing import Any

-from llama_stack.core.datatypes import Api
+from llama_stack.core.datatypes import AccessRule, Api

 from .config import S3FilesImplConfig


-async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any]):
+async def get_adapter_impl(config: S3FilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None):
    from .files import S3FilesImpl

-    # TODO: authorization policies and user separation
-    impl = S3FilesImpl(config)
+    impl = S3FilesImpl(config, policy or [])
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/files/s3/files.py
+++ b/llama_stack/providers/remote/files/s3/files.py
@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import time
 import uuid
-from typing import Annotated
+from datetime import UTC, datetime
+from typing import Annotated, Any

 import boto3
 from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
@ -15,14 +15,17 @@ from fastapi import File, Form, Response, UploadFile
 from llama_stack.apis.common.errors import ResourceNotFoundError
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.files import (
+    ExpiresAfter,
    Files,
    ListOpenAIFileResponse,
    OpenAIFileDeleteResponse,
    OpenAIFileObject,
    OpenAIFilePurpose,
 )
+from llama_stack.core.datatypes import AccessRule
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
-from llama_stack.providers.utils.sqlstore.sqlstore import SqlStore, sqlstore_impl
+from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl

 from .config import S3FilesImplConfig

@ -83,22 +86,85 @@ async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImpl
            raise RuntimeError(f"Failed to access S3 bucket '{config.bucket_name}': {e}") from e


+def _make_file_object(
+    *,
+    id: str,
+    filename: str,
+    purpose: str,
+    bytes: int,
+    created_at: int,
+    expires_at: int,
+    **kwargs: Any,  # here to ignore any additional fields, e.g. extra fields from AuthorizedSqlStore
+) -> OpenAIFileObject:
+    """
+    Construct an OpenAIFileObject and normalize expires_at.
+
+    If expires_at is greater than the max we treat it as no-expiration and
+    return None for expires_at.
+
+    The OpenAI spec says expires_at type is Integer, but the implementation
+    will return None for no expiration.
+    """
+    obj = OpenAIFileObject(
+        id=id,
+        filename=filename,
+        purpose=OpenAIFilePurpose(purpose),
+        bytes=bytes,
+        created_at=created_at,
+        expires_at=expires_at,
+    )
+
+    if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX):
+        obj.expires_at = None  # type: ignore
+
+    return obj
+
+
 class S3FilesImpl(Files):
    """S3-based implementation of the Files API."""

-    # TODO: implement expiration, for now a silly offset
-    _SILLY_EXPIRATION_OFFSET = 100 * 365 * 24 * 60 * 60
-
-    def __init__(self, config: S3FilesImplConfig) -> None:
+    def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None:
        self._config = config
+        self.policy = policy
        self._client: boto3.client | None = None
-        self._sql_store: SqlStore | None = None
+        self._sql_store: AuthorizedSqlStore | None = None
+
+    def _now(self) -> int:
+        """Return current UTC timestamp as int seconds."""
+        return int(datetime.now(UTC).timestamp())
+
+    async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]:
+        where: dict[str, str | dict] = {"id": file_id}
+        if not return_expired:
+            where["expires_at"] = {">": self._now()}
+        if not (row := await self.sql_store.fetch_one("openai_files", policy=self.policy, where=where)):
+            raise ResourceNotFoundError(file_id, "File", "files.list()")
+        return row
+
+    async def _delete_file(self, file_id: str) -> None:
+        """Delete a file from S3 and the database."""
+        try:
+            self.client.delete_object(
+                Bucket=self._config.bucket_name,
+                Key=file_id,
+            )
+        except ClientError as e:
+            if e.response["Error"]["Code"] != "NoSuchKey":
+                raise RuntimeError(f"Failed to delete file from S3: {e}") from e
+
+        await self.sql_store.delete("openai_files", where={"id": file_id})
+
+    async def _delete_if_expired(self, file_id: str) -> None:
+        """If the file exists and is expired, delete it."""
+        if row := await self._get_file(file_id, return_expired=True):
+            if (expires_at := row.get("expires_at")) and expires_at <= self._now():
+                await self._delete_file(file_id)

    async def initialize(self) -> None:
        self._client = _create_s3_client(self._config)
        await _create_bucket_if_not_exists(self._client, self._config)

-        self._sql_store = sqlstore_impl(self._config.metadata_store)
+        self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store))
        await self._sql_store.create_table(
            "openai_files",
            {
@ -121,7 +187,7 @@ class S3FilesImpl(Files):
        return self._client

    @property
-    def sql_store(self) -> SqlStore:
+    def sql_store(self) -> AuthorizedSqlStore:
        assert self._sql_store is not None, "Provider not initialized"
        return self._sql_store

@ -129,27 +195,47 @@ class S3FilesImpl(Files):
        self,
        file: Annotated[UploadFile, File()],
        purpose: Annotated[OpenAIFilePurpose, Form()],
+        expires_after_anchor: Annotated[str | None, Form(alias="expires_after[anchor]")] = None,
+        expires_after_seconds: Annotated[int | None, Form(alias="expires_after[seconds]")] = None,
    ) -> OpenAIFileObject:
        file_id = f"file-{uuid.uuid4().hex}"

        filename = getattr(file, "filename", None) or "uploaded_file"

-        created_at = int(time.time())
-        expires_at = created_at + self._SILLY_EXPIRATION_OFFSET
+        created_at = self._now()
+
+        expires_after = None
+        if expires_after_anchor is not None or expires_after_seconds is not None:
+            # we use ExpiresAfter to validate input
+            expires_after = ExpiresAfter(
+                anchor=expires_after_anchor,  # type: ignore[arg-type]
+                seconds=expires_after_seconds,  # type: ignore[arg-type]
+            )
+
+        # the default is no expiration.
+        # to implement no expiration we set an expiration beyond the max.
+        # we'll hide this fact from users when returning the file object.
+        expires_at = created_at + ExpiresAfter.MAX * 42
+        # the default for BATCH files is 30 days, which happens to be the expiration max.
+        if purpose == OpenAIFilePurpose.BATCH:
+            expires_at = created_at + ExpiresAfter.MAX
+
+        if expires_after is not None:
+            expires_at = created_at + expires_after.seconds
+
        content = await file.read()
        file_size = len(content)

-        await self.sql_store.insert(
-            "openai_files",
-            {
+        entry: dict[str, Any] = {
            "id": file_id,
            "filename": filename,
            "purpose": purpose.value,
            "bytes": file_size,
            "created_at": created_at,
            "expires_at": expires_at,
-            },
-        )
+        }
+
+        await self.sql_store.insert("openai_files", entry)

        try:
            self.client.put_object(
@ -163,14 +249,7 @@ class S3FilesImpl(Files):

            raise RuntimeError(f"Failed to upload file to S3: {e}") from e

-        return OpenAIFileObject(
-            id=file_id,
-            filename=filename,
-            purpose=purpose,
-            bytes=file_size,
-            created_at=created_at,
-            expires_at=expires_at,
-        )
+        return _make_file_object(**entry)

    async def openai_list_files(
        self,
@ -183,29 +262,20 @@ class S3FilesImpl(Files):
        if not order:
            order = Order.desc

-        where_conditions = {}
+        where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}}
        if purpose:
            where_conditions["purpose"] = purpose.value

        paginated_result = await self.sql_store.fetch_all(
            table="openai_files",
-            where=where_conditions if where_conditions else None,
+            policy=self.policy,
+            where=where_conditions,
            order_by=[("created_at", order.value)],
            cursor=("id", after) if after else None,
            limit=limit,
        )

-        files = [
-            OpenAIFileObject(
-                id=row["id"],
-                filename=row["filename"],
-                purpose=OpenAIFilePurpose(row["purpose"]),
-                bytes=row["bytes"],
-                created_at=row["created_at"],
-                expires_at=row["expires_at"],
-            )
-            for row in paginated_result.data
-        ]
+        files = [_make_file_object(**row) for row in paginated_result.data]

        return ListOpenAIFileResponse(
            data=files,
@ -216,41 +286,20 @@ class S3FilesImpl(Files):
        )

    async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
-        row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
-        if not row:
-            raise ResourceNotFoundError(file_id, "File", "files.list()")
-
-        return OpenAIFileObject(
-            id=row["id"],
-            filename=row["filename"],
-            purpose=OpenAIFilePurpose(row["purpose"]),
-            bytes=row["bytes"],
-            created_at=row["created_at"],
-            expires_at=row["expires_at"],
-        )
+        await self._delete_if_expired(file_id)
+        row = await self._get_file(file_id)
+        return _make_file_object(**row)

    async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
-        row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
-        if not row:
-            raise ResourceNotFoundError(file_id, "File", "files.list()")
-
-        try:
-            self.client.delete_object(
-                Bucket=self._config.bucket_name,
-                Key=row["id"],
-            )
-        except ClientError as e:
-            if e.response["Error"]["Code"] != "NoSuchKey":
-                raise RuntimeError(f"Failed to delete file from S3: {e}") from e
-
-        await self.sql_store.delete("openai_files", where={"id": file_id})
-
+        await self._delete_if_expired(file_id)
+        _ = await self._get_file(file_id)  # raises if not found
+        await self._delete_file(file_id)
        return OpenAIFileDeleteResponse(id=file_id, deleted=True)

    async def openai_retrieve_file_content(self, file_id: str) -> Response:
-        row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
-        if not row:
-            raise ResourceNotFoundError(file_id, "File", "files.list()")
+        await self._delete_if_expired(file_id)
+
+        row = await self._get_file(file_id)

        try:
            response = self.client.get_object(
@ -261,7 +310,7 @@ class S3FilesImpl(Files):
            content = response["Body"].read()
        except ClientError as e:
            if e.response["Error"]["Code"] == "NoSuchKey":
-                await self.sql_store.delete("openai_files", where={"id": file_id})
+                await self._delete_file(file_id)
                raise ResourceNotFoundError(file_id, "File", "files.list()") from e
            raise RuntimeError(f"Failed to download file from S3: {e}") from e

--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@ -41,10 +41,10 @@ client.initialize()

 ### Create Completion

-> Note on Completion API
->
-> The hosted NVIDIA Llama NIMs (e.g., `meta-llama/Llama-3.1-8B-Instruct`) with ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` does not support the ```completion``` method, while the locally deployed NIM does.
+The following example shows how to create a completion for an NVIDIA NIM.

+> [!NOTE]
+> The hosted NVIDIA Llama NIMs (for example ```meta-llama/Llama-3.1-8B-Instruct```) that have ```NVIDIA_BASE_URL="https://integrate.api.nvidia.com"``` do not support the ```completion``` method, while locally deployed NIMs do.

 ```python
 response = client.inference.completion(
@ -60,6 +60,8 @@ print(f"Response: {response.content}")

 ### Create Chat Completion

+The following example shows how to create a chat completion for an NVIDIA NIM.
+
 ```python
 response = client.inference.chat_completion(
    model_id="meta-llama/Llama-3.1-8B-Instruct",
@ -82,6 +84,9 @@ print(f"Response: {response.completion_message.content}")
 ```

 ### Tool Calling Example ###
+
+The following example shows how to do tool calling for an NVIDIA NIM.
+
 ```python
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition

@ -117,6 +122,9 @@ if tool_response.completion_message.tool_calls:
 ```

 ### Structured Output Example
+
+The following example shows how to do structured output for an NVIDIA NIM.
+
 ```python
 from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType

@ -149,8 +157,10 @@ print(f"Structured Response: {structured_response.completion_message.content}")
 ```

 ### Create Embeddings
-> Note on OpenAI embeddings compatibility
->
+
+The following example shows how to create embeddings for an NVIDIA NIM.
+
+> [!NOTE]
 > NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.

 ```python
@ -161,3 +171,41 @@ response = client.inference.embeddings(
 )
 print(f"Embeddings: {response.embeddings}")
 ```
+
+### Vision Language Models Example
+
+The following example shows how to run vision inference by using an NVIDIA NIM.
+
+```python
+def load_image_as_base64(image_path):
+    with open(image_path, "rb") as image_file:
+        img_bytes = image_file.read()
+        return base64.b64encode(img_bytes).decode("utf-8")
+
+
+image_path = {path_to_the_image}
+demo_image_b64 = load_image_as_base64(image_path)
+
+vlm_response = client.inference.chat_completion(
+    model_id="nvidia/vila",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "image": {
+                        "data": demo_image_b64,
+                    },
+                },
+                {
+                    "type": "text",
+                    "text": "Please describe what you see in this image in detail.",
+                },
+            ],
+        }
+    ],
+)
+
+print(f"VLM Response: {vlm_response.completion_message.content}")
+```
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@ -55,6 +55,10 @@ MODEL_ENTRIES = [
        "meta/llama-3.3-70b-instruct",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/vila",
+        model_type=ModelType.llm,
+    ),
    # NeMo Retriever Text Embedding models -
    #
    # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -118,10 +118,10 @@ class OllamaInferenceAdapter(

    async def initialize(self) -> None:
        logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
-        health_response = await self.health()
-        if health_response["status"] == HealthStatus.ERROR:
+        r = await self.health()
+        if r["status"] == HealthStatus.ERROR:
            logger.warning(
-                "Ollama Server is not running, make sure to start it using `ollama serve` in a separate terminal"
+                f"Ollama Server is not running (message: {r['message']}). Make sure to start it using `ollama serve` in a separate terminal"
            )

    async def should_refresh_models(self) -> bool:
@ -156,7 +156,7 @@ class OllamaInferenceAdapter(
            ),
            Model(
                identifier="nomic-embed-text",
-                provider_resource_id="nomic-embed-text",
+                provider_resource_id="nomic-embed-text:latest",
                provider_id=provider_id,
                metadata={
                    "embedding_dimension": 768,
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -7,8 +7,8 @@
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any

-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
+from ibm_watsonx_ai.foundation_models import Model
+from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
 from openai import AsyncOpenAI

 from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import heapq
 from typing import Any

 import psycopg2
@ -23,6 +24,9 @@ from llama_stack.apis.vector_io import (
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    interleaved_content_as_str,
+)
 from llama_stack.providers.utils.kvstore import kvstore_impl
 from llama_stack.providers.utils.kvstore.api import KVStore
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -31,6 +35,7 @@ from llama_stack.providers.utils.memory.vector_store import (
    EmbeddingIndex,
    VectorDBWithIndex,
 )
+from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name

 from .config import PGVectorVectorIOConfig

@ -72,26 +77,64 @@ def load_models(cur, cls):


 class PGVectorIndex(EmbeddingIndex):
-    def __init__(self, vector_db: VectorDB, dimension: int, conn, kvstore: KVStore | None = None):
+    # reference: https://github.com/pgvector/pgvector?tab=readme-ov-file#querying
+    PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION: dict[str, str] = {
+        "L2": "<->",
+        "L1": "<+>",
+        "COSINE": "<=>",
+        "INNER_PRODUCT": "<#>",
+        "HAMMING": "<~>",
+        "JACCARD": "<%>",
+    }
+
+    def __init__(
+        self,
+        vector_db: VectorDB,
+        dimension: int,
+        conn: psycopg2.extensions.connection,
+        kvstore: KVStore | None = None,
+        distance_metric: str = "COSINE",
+    ):
+        self.vector_db = vector_db
+        self.dimension = dimension
        self.conn = conn
-        with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        self.kvstore = kvstore
+        self.check_distance_metric_availability(distance_metric)
+        self.distance_metric = distance_metric
+        self.table_name = None
+
+    async def initialize(self) -> None:
+        try:
+            with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                # Sanitize the table name by replacing hyphens with underscores
                # SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
                # when created with patterns like "test-vector-db-{uuid4()}"
-            sanitized_identifier = vector_db.identifier.replace("-", "_")
-            self.table_name = f"vector_store_{sanitized_identifier}"
-            self.kvstore = kvstore
+                sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
+                self.table_name = f"vs_{sanitized_identifier}"

                cur.execute(
                    f"""
                    CREATE TABLE IF NOT EXISTS {self.table_name} (
                        id TEXT PRIMARY KEY,
                        document JSONB,
-                    embedding vector({dimension})
+                        embedding vector({self.dimension}),
+                        content_text TEXT,
+                        tokenized_content TSVECTOR
                    )
                """
                )

+                # Create GIN index for full-text search performance
+                cur.execute(
+                    f"""
+                    CREATE INDEX IF NOT EXISTS {self.table_name}_content_gin_idx
+                    ON {self.table_name} USING GIN(tokenized_content)
+                """
+                )
+        except Exception as e:
+            log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
+            raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
+
    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
        assert len(chunks) == len(embeddings), (
            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
@ -99,29 +142,49 @@ class PGVectorIndex(EmbeddingIndex):

        values = []
        for i, chunk in enumerate(chunks):
+            content_text = interleaved_content_as_str(chunk.content)
            values.append(
                (
                    f"{chunk.chunk_id}",
                    Json(chunk.model_dump()),
                    embeddings[i].tolist(),
+                    content_text,
+                    content_text,  # Pass content_text twice - once for content_text column, once for to_tsvector function. Eg. to_tsvector(content_text) = tokenized_content
                )
            )

        query = sql.SQL(
            f"""
-        INSERT INTO {self.table_name} (id, document, embedding)
+        INSERT INTO {self.table_name} (id, document, embedding, content_text, tokenized_content)
        VALUES %s
-        ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding, document = EXCLUDED.document
+        ON CONFLICT (id) DO UPDATE SET
+            embedding = EXCLUDED.embedding,
+            document = EXCLUDED.document,
+            content_text = EXCLUDED.content_text,
+            tokenized_content = EXCLUDED.tokenized_content
    """
        )
        with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-            execute_values(cur, query, values, template="(%s, %s, %s::vector)")
+            execute_values(cur, query, values, template="(%s, %s, %s::vector, %s, to_tsvector('english', %s))")

    async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
+        """
+        Performs vector similarity search using PostgreSQL's search function. Default distance metric is COSINE.
+
+        Args:
+            embedding: The query embedding vector
+            k: Number of results to return
+            score_threshold: Minimum similarity score threshold
+
+        Returns:
+            QueryChunksResponse with combined results
+        """
+        pgvector_search_function = self.get_pgvector_search_function()
+
        with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
            cur.execute(
                f"""
-            SELECT document, embedding <-> %s::vector AS distance
+            SELECT document, embedding {pgvector_search_function} %s::vector AS distance
            FROM {self.table_name}
            ORDER BY distance
            LIMIT %s
@ -147,7 +210,40 @@ class PGVectorIndex(EmbeddingIndex):
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
-        raise NotImplementedError("Keyword search is not supported in PGVector")
+        """
+        Performs keyword-based search using PostgreSQL's full-text search with ts_rank scoring.
+
+        Args:
+            query_string: The text query for keyword search
+            k: Number of results to return
+            score_threshold: Minimum similarity score threshold
+
+        Returns:
+            QueryChunksResponse with combined results
+        """
+        with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+            # Use plainto_tsquery to handle user input safely and ts_rank for relevance scoring
+            cur.execute(
+                f"""
+            SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
+            FROM {self.table_name}
+            WHERE tokenized_content @@ plainto_tsquery('english', %s)
+            ORDER BY score DESC
+            LIMIT %s
+        """,
+                (query_string, query_string, k),
+            )
+            results = cur.fetchall()
+
+            chunks = []
+            scores = []
+            for doc, score in results:
+                if score < score_threshold:
+                    continue
+                chunks.append(Chunk(**doc))
+                scores.append(float(score))
+
+            return QueryChunksResponse(chunks=chunks, scores=scores)

    async def query_hybrid(
        self,
@ -158,7 +254,59 @@ class PGVectorIndex(EmbeddingIndex):
        reranker_type: str,
        reranker_params: dict[str, Any] | None = None,
    ) -> QueryChunksResponse:
-        raise NotImplementedError("Hybrid search is not supported in PGVector")
+        """
+        Hybrid search combining vector similarity and keyword search using configurable reranking.
+
+        Args:
+            embedding: The query embedding vector
+            query_string: The text query for keyword search
+            k: Number of results to return
+            score_threshold: Minimum similarity score threshold
+            reranker_type: Type of reranker to use ("rrf" or "weighted")
+            reranker_params: Parameters for the reranker
+
+        Returns:
+            QueryChunksResponse with combined results
+        """
+        if reranker_params is None:
+            reranker_params = {}
+
+        # Get results from both search methods
+        vector_response = await self.query_vector(embedding, k, score_threshold)
+        keyword_response = await self.query_keyword(query_string, k, score_threshold)
+
+        # Convert responses to score dictionaries using chunk_id
+        vector_scores = {
+            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
+        }
+        keyword_scores = {
+            chunk.chunk_id: score
+            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
+        }
+
+        # Combine scores using the reranking utility
+        combined_scores = WeightedInMemoryAggregator.combine_search_results(
+            vector_scores, keyword_scores, reranker_type, reranker_params
+        )
+
+        # Efficient top-k selection because it only tracks the k best candidates it's seen so far
+        top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
+
+        # Filter by score threshold
+        filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
+
+        # Create a map of chunk_id to chunk for both responses
+        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
+
+        # Use the map to look up chunks by their IDs
+        chunks = []
+        scores = []
+        for doc_id, score in filtered_items:
+            if doc_id in chunk_map:
+                chunks.append(chunk_map[doc_id])
+                scores.append(score)
+
+        return QueryChunksResponse(chunks=chunks, scores=scores)

    async def delete(self):
        with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
@ -170,6 +318,25 @@ class PGVectorIndex(EmbeddingIndex):
        with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
            cur.execute(f"DELETE FROM {self.table_name} WHERE id = ANY(%s)", (chunk_ids,))

+    def get_pgvector_search_function(self) -> str:
+        return self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION[self.distance_metric]
+
+    def check_distance_metric_availability(self, distance_metric: str) -> None:
+        """Check if the distance metric is supported by PGVector.
+
+        Args:
+            distance_metric: The distance metric to check
+
+        Raises:
+            ValueError: If the distance metric is not supported
+        """
+        if distance_metric not in self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION:
+            supported_metrics = list(self.PGVECTOR_DISTANCE_METRIC_TO_SEARCH_FUNCTION.keys())
+            raise ValueError(
+                f"Distance metric '{distance_metric}' is not supported by PGVector. "
+                f"Supported metrics are: {', '.join(supported_metrics)}"
+            )
+

 class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
    def __init__(
@ -185,8 +352,8 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
        self.files_api = files_api
        self.kvstore: KVStore | None = None
        self.vector_db_store = None
-        self.openai_vector_store: dict[str, dict[str, Any]] = {}
-        self.metadatadata_collection_name = "openai_vector_stores_metadata"
+        self.openai_vector_stores: dict[str, dict[str, Any]] = {}
+        self.metadata_collection_name = "openai_vector_stores_metadata"

    async def initialize(self) -> None:
        log.info(f"Initializing PGVector memory adapter with config: {self.config}")
@ -233,9 +400,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
        upsert_models(self.conn, [(vector_db.identifier, vector_db)])

        # Create and cache the PGVector index table for the vector DB
+        pgvector_index = PGVectorIndex(
+            vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
+        )
+        await pgvector_index.initialize()
        index = VectorDBWithIndex(
            vector_db,
-            index=PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn, kvstore=self.kvstore),
+            index=pgvector_index,
            inference_api=self.inference_api,
        )
        self.cache[vector_db.identifier] = index
@ -272,8 +443,15 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
        if vector_db_id in self.cache:
            return self.cache[vector_db_id]

+        if self.vector_db_store is None:
+            raise VectorStoreNotFoundError(vector_db_id)
+
        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
+        if not vector_db:
+            raise VectorStoreNotFoundError(vector_db_id)
+
        index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
+        await index.initialize()
        self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
        return self.cache[vector_db_id]

--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -294,12 +294,12 @@ class VectorDBWithIndex:
                _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)

        if chunks_to_embed:
-            resp = await self.inference_api.embeddings(
+            resp = await self.inference_api.openai_embeddings(
                self.vector_db.embedding_model,
                [c.content for c in chunks_to_embed],
            )
-            for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False):
-                c.embedding = embedding
+            for c, data in zip(chunks_to_embed, resp.data, strict=False):
+                c.embedding = data.embedding

        embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
        await self.index.add_chunks(chunks, embeddings)
@ -334,8 +334,8 @@ class VectorDBWithIndex:
        if mode == "keyword":
            return await self.index.query_keyword(query_string, k, score_threshold)

-        embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string])
-        query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32)
+        embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string])
+        query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32)
        if mode == "hybrid":
            return await self.index.query_hybrid(
                query_vector, query_string, k, score_threshold, reranker_type, reranker_params
--- a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@ -23,6 +23,7 @@ from sqlalchemy import (
 )
 from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from sqlalchemy.ext.asyncio.engine import AsyncEngine
+from sqlalchemy.sql.elements import ColumnElement

 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.log import get_logger
@ -43,6 +44,30 @@ TYPE_MAPPING: dict[ColumnType, Any] = {
 }


+def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
+    """Return a SQLAlchemy expression for a where condition.
+
+    `value` may be a simple scalar (equality) or a mapping like {">": 123}.
+    The returned expression is a SQLAlchemy ColumnElement usable in query.where(...).
+    """
+    if isinstance(value, Mapping):
+        if len(value) != 1:
+            raise ValueError(f"Operator mapping must have a single operator, got: {value}")
+        op, operand = next(iter(value.items()))
+        if op == "==" or op == "=":
+            return column == operand
+        if op == ">":
+            return column > operand
+        if op == "<":
+            return column < operand
+        if op == ">=":
+            return column >= operand
+        if op == "<=":
+            return column <= operand
+        raise ValueError(f"Unsupported operator '{op}' in where mapping")
+    return column == value
+
+
 class SqlAlchemySqlStoreImpl(SqlStore):
    def __init__(self, config: SqlAlchemySqlStoreConfig):
        self.config = config
@ -111,7 +136,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):

            if where:
                for key, value in where.items():
-                    query = query.where(table_obj.c[key] == value)
+                    query = query.where(_build_where_expr(table_obj.c[key], value))

            if where_sql:
                query = query.where(text(where_sql))
@ -222,7 +247,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
        async with self.async_session() as session:
            stmt = self.metadata.tables[table].update()
            for key, value in where.items():
-                stmt = stmt.where(self.metadata.tables[table].c[key] == value)
+                stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
            await session.execute(stmt, data)
            await session.commit()

@ -233,7 +258,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
        async with self.async_session() as session:
            stmt = self.metadata.tables[table].delete()
            for key, value in where.items():
-                stmt = stmt.where(self.metadata.tables[table].c[key] == value)
+                stmt = stmt.where(_build_where_expr(self.metadata.tables[table].c[key], value))
            await session.execute(stmt)
            await session.commit()

--- a/llama_stack/providers/utils/vector_io/vector_utils.py
+++ b/llama_stack/providers/utils/vector_io/vector_utils.py
@ -37,3 +37,122 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
    else:
        s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
    return s
+
+
+class WeightedInMemoryAggregator:
+    @staticmethod
+    def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
+        """
+        Normalize scores to 0-1 range using min-max normalization.
+
+        Args:
+            scores: dictionary of scores with document IDs as keys and scores as values
+
+        Returns:
+            Normalized scores with document IDs as keys and normalized scores as values
+        """
+        if not scores:
+            return {}
+        min_score, max_score = min(scores.values()), max(scores.values())
+        score_range = max_score - min_score
+        if score_range > 0:
+            return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
+        return dict.fromkeys(scores, 1.0)
+
+    @staticmethod
+    def weighted_rerank(
+        vector_scores: dict[str, float],
+        keyword_scores: dict[str, float],
+        alpha: float = 0.5,
+    ) -> dict[str, float]:
+        """
+        Rerank via weighted average of scores.
+
+        Args:
+            vector_scores: scores from vector search
+            keyword_scores: scores from keyword search
+            alpha: weight factor between 0 and 1 (default: 0.5)
+                   0 = keyword only, 1 = vector only, 0.5 = equal weight
+
+        Returns:
+            All unique document IDs with weighted combined scores
+        """
+        all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
+        normalized_vector_scores = WeightedInMemoryAggregator._normalize_scores(vector_scores)
+        normalized_keyword_scores = WeightedInMemoryAggregator._normalize_scores(keyword_scores)
+
+        # Weighted formula: score = (1-alpha) * keyword_score + alpha * vector_score
+        # alpha=0 means keyword only, alpha=1 means vector only
+        return {
+            doc_id: ((1 - alpha) * normalized_keyword_scores.get(doc_id, 0.0))
+            + (alpha * normalized_vector_scores.get(doc_id, 0.0))
+            for doc_id in all_ids
+        }
+
+    @staticmethod
+    def rrf_rerank(
+        vector_scores: dict[str, float],
+        keyword_scores: dict[str, float],
+        impact_factor: float = 60.0,
+    ) -> dict[str, float]:
+        """
+        Rerank via Reciprocal Rank Fusion.
+
+        Args:
+            vector_scores: scores from vector search
+            keyword_scores: scores from keyword search
+            impact_factor: impact factor for RRF (default: 60.0)
+
+        Returns:
+            All unique document IDs with RRF combined scores
+        """
+
+        # Convert scores to ranks
+        vector_ranks = {
+            doc_id: i + 1
+            for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
+        }
+        keyword_ranks = {
+            doc_id: i + 1
+            for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
+        }
+
+        all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
+        rrf_scores = {}
+        for doc_id in all_ids:
+            vector_rank = vector_ranks.get(doc_id, float("inf"))
+            keyword_rank = keyword_ranks.get(doc_id, float("inf"))
+
+            # RRF formula: score = 1/(k + r) where k is impact_factor (default: 60.0) and r is the rank
+            rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
+        return rrf_scores
+
+    @staticmethod
+    def combine_search_results(
+        vector_scores: dict[str, float],
+        keyword_scores: dict[str, float],
+        reranker_type: str = "rrf",
+        reranker_params: dict[str, float] | None = None,
+    ) -> dict[str, float]:
+        """
+        Combine vector and keyword search results using specified reranking strategy.
+
+        Args:
+            vector_scores: scores from vector search
+            keyword_scores: scores from keyword search
+            reranker_type: type of reranker to use (default: RERANKER_TYPE_RRF)
+            reranker_params: parameters for the reranker
+
+        Returns:
+            All unique document IDs with combined scores
+        """
+        if reranker_params is None:
+            reranker_params = {}
+
+        if reranker_type == "weighted":
+            alpha = reranker_params.get("alpha", 0.5)
+            return WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha)
+        else:
+            # Default to RRF for None, RRF, or any unknown types
+            impact_factor = reranker_params.get("impact_factor", 60.0)
+            return WeightedInMemoryAggregator.rrf_rerank(vector_scores, keyword_scores, impact_factor)
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -30,6 +30,9 @@ from openai.types.completion_choice import CompletionChoice
 CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
 CompletionChoice.model_rebuild()

+REPO_ROOT = Path(__file__).parent.parent.parent
+DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/recordings"
+

 class InferenceMode(StrEnum):
    LIVE = "live"
@ -51,7 +54,7 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict


 def get_inference_mode() -> InferenceMode:
-    return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower())
+    return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "replay").lower())


 def setup_inference_recording():
@ -60,28 +63,18 @@ def setup_inference_recording():
    to increase their reliability and reduce reliance on expensive, external services.

    Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
-    Calls to the /models endpoint are not currently trapped. We probably need to add support for this.

-    Two environment variables are required:
-    - LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'.
-    - LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in.
+    Two environment variables are supported:
+    - LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'. Default is 'replay'.
+    - LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in. Default is 'tests/integration/recordings'.

-    The recordings are stored in a SQLite database and a JSON file for each request. The SQLite database is used to
-    quickly find the correct recording for a given request. The JSON files are used to store the request and response
-    bodies.
+    The recordings are stored as JSON files.
    """
    mode = get_inference_mode()
-
-    if mode not in InferenceMode:
-        raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
-
    if mode == InferenceMode.LIVE:
        return None

-    if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
-        raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
-    storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]
-
+    storage_dir = os.environ.get("LLAMA_STACK_TEST_RECORDING_DIR", DEFAULT_STORAGE_DIR)
    return inference_recording(mode=mode, storage_dir=storage_dir)


@ -134,8 +127,8 @@ class ResponseStorage:
    def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
        """Store a request/response pair."""
        # Generate unique response filename
-        response_file = f"{request_hash[:12]}.json"
-        response_path = self.responses_dir / response_file
+        short_hash = request_hash[:12]
+        response_file = f"{short_hash}.json"

        # Serialize response body if needed
        serialized_response = dict(response)
@ -147,6 +140,14 @@ class ResponseStorage:
                # Handle single response
                serialized_response["body"] = _serialize_response(serialized_response["body"])

+        # If this is an Ollama /api/tags recording, include models digest in filename to distinguish variants
+        endpoint = request.get("endpoint")
+        if endpoint in ("/api/tags", "/v1/models"):
+            digest = _model_identifiers_digest(endpoint, response)
+            response_file = f"models-{short_hash}-{digest}.json"
+
+        response_path = self.responses_dir / response_file
+
        # Save response to JSON file
        with open(response_path, "w") as f:
            json.dump({"request": request, "response": serialized_response}, f, indent=2)
@ -161,6 +162,17 @@ class ResponseStorage:
        if not response_path.exists():
            return None

+        return _recording_from_file(response_path)
+
+    def _model_list_responses(self, short_hash: str) -> list[dict[str, Any]]:
+        results: list[dict[str, Any]] = []
+        for path in self.responses_dir.glob(f"models-{short_hash}-*.json"):
+            data = _recording_from_file(path)
+            results.append(data)
+        return results
+
+
+def _recording_from_file(response_path) -> dict[str, Any]:
    with open(response_path) as f:
        data = json.load(f)

@ -176,6 +188,61 @@ class ResponseStorage:
    return cast(dict[str, Any], data)


+def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
+    def _extract_model_identifiers():
+        """Extract a stable set of identifiers for model-list endpoints.
+
+        Supported endpoints:
+        - '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
+        - '/v1/models' (OpenAI): response body has 'data': [ { id: ... }, ... ]
+        Returns a list of unique identifiers or None if structure doesn't match.
+        """
+        body = response["body"]
+        if endpoint == "/api/tags":
+            items = body.get("models")
+            idents = [m.model for m in items]
+        else:
+            items = body.get("data")
+            idents = [m.id for m in items]
+        return sorted(set(idents))
+
+    identifiers = _extract_model_identifiers()
+    return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
+
+
+def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
+    """Return a single, unioned recording for supported model-list endpoints."""
+    seen: dict[str, dict[str, Any]] = {}
+    for rec in records:
+        body = rec["response"]["body"]
+        if endpoint == "/api/tags":
+            items = body.models
+        elif endpoint == "/v1/models":
+            items = body.data
+        else:
+            items = []
+
+        for m in items:
+            if endpoint == "/v1/models":
+                key = m.id
+            else:
+                key = m.model
+            seen[key] = m
+
+    ordered = [seen[k] for k in sorted(seen.keys())]
+    canonical = records[0]
+    canonical_req = canonical.get("request", {})
+    if isinstance(canonical_req, dict):
+        canonical_req["endpoint"] = endpoint
+    if endpoint == "/v1/models":
+        body = {"data": ordered, "object": "list"}
+    else:
+        from ollama import ListResponse
+
+        body = ListResponse(models=ordered)
+    return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
+
+
 async def _patched_inference_method(original_method, self, client_type, endpoint, *args, **kwargs):
    global _current_mode, _current_storage

@ -195,8 +262,6 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
        raise ValueError(f"Unknown client type: {client_type}")

    url = base_url.rstrip("/") + endpoint
-
-    # Normalize request for matching
    method = "POST"
    headers = {}
    body = kwargs
@ -204,6 +269,11 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
    request_hash = normalize_request(method, url, headers, body)

    if _current_mode == InferenceMode.REPLAY:
+        # Special handling for model-list endpoints: return union of all responses
+        if endpoint in ("/api/tags", "/v1/models"):
+            records = _current_storage._model_list_responses(request_hash[:12])
+            recording = _combine_model_list_responses(endpoint, records)
+        else:
            recording = _current_storage.find_recording(request_hash)
        if recording:
            response_body = recording["response"]["body"]
@ -274,12 +344,14 @@ def patch_inference_clients():
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
    from openai.resources.embeddings import AsyncEmbeddings
+    from openai.resources.models import AsyncModels

    # Store original methods for both OpenAI and Ollama clients
    _original_methods = {
        "chat_completions_create": AsyncChatCompletions.create,
        "completions_create": AsyncCompletions.create,
        "embeddings_create": AsyncEmbeddings.create,
+        "models_list": AsyncModels.list,
        "ollama_generate": OllamaAsyncClient.generate,
        "ollama_chat": OllamaAsyncClient.chat,
        "ollama_embed": OllamaAsyncClient.embed,
@ -304,10 +376,16 @@ def patch_inference_clients():
            _original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
        )

+    async def patched_models_list(self, *args, **kwargs):
+        return await _patched_inference_method(
+            _original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
+        )
+
    # Apply OpenAI patches
    AsyncChatCompletions.create = patched_chat_completions_create
    AsyncCompletions.create = patched_completions_create
    AsyncEmbeddings.create = patched_embeddings_create
+    AsyncModels.list = patched_models_list

    # Create patched methods for Ollama client
    async def patched_ollama_generate(self, *args, **kwargs):
@ -361,11 +439,13 @@ def unpatch_inference_clients():
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
    from openai.resources.embeddings import AsyncEmbeddings
+    from openai.resources.models import AsyncModels

    # Restore OpenAI client methods
    AsyncChatCompletions.create = _original_methods["chat_completions_create"]
    AsyncCompletions.create = _original_methods["completions_create"]
    AsyncEmbeddings.create = _original_methods["embeddings_create"]
+    AsyncModels.list = _original_methods["models_list"]

    # Restore Ollama client methods if they were patched
    OllamaAsyncClient.generate = _original_methods["ollama_generate"]
@ -379,16 +459,10 @@ def unpatch_inference_clients():


@contextmanager
-def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
+def inference_recording(mode: str, storage_dir: str | Path | None = None) -> Generator[None, None, None]:
    """Context manager for inference recording/replaying."""
    global _current_mode, _current_storage

-    # Set defaults
-    if storage_dir is None:
-        storage_dir_path = Path.home() / ".llama" / "recordings"
-    else:
-        storage_dir_path = Path(storage_dir)
-
    # Store previous state
    prev_mode = _current_mode
    prev_storage = _current_storage
@ -397,7 +471,9 @@ def inference_recording(mode: str = "live", storage_dir: str | Path | None = Non
        _current_mode = mode

        if mode in ["record", "replay"]:
-            _current_storage = ResponseStorage(storage_dir_path)
+            if storage_dir is None:
+                raise ValueError("storage_dir is required for record and replay modes")
+            _current_storage = ResponseStorage(Path(storage_dir))
            patch_inference_clients()

        yield
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@ -14,11 +14,11 @@
        "@radix-ui/react-select": "^2.2.5",
        "@radix-ui/react-separator": "^1.1.7",
        "@radix-ui/react-slot": "^1.2.3",
-        "@radix-ui/react-tooltip": "^1.2.6",
+        "@radix-ui/react-tooltip": "^1.2.8",
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
-        "framer-motion": "^11.18.2",
-        "llama-stack-client": "^0.2.19",
+        "framer-motion": "^12.23.12",
+        "llama-stack-client": "^0.2.20",
        "lucide-react": "^0.510.0",
        "next": "15.3.3",
        "next-auth": "^4.24.11",
@ -39,16 +39,16 @@
        "@testing-library/jest-dom": "^6.8.0",
        "@testing-library/react": "^16.3.0",
        "@types/jest": "^29.5.14",
-        "@types/node": "^20",
+        "@types/node": "^24",
        "@types/react": "^19",
        "@types/react-dom": "^19",
        "eslint": "^9",
-        "eslint-config-next": "15.3.2",
+        "eslint-config-next": "15.5.2",
        "eslint-config-prettier": "^10.1.8",
        "eslint-plugin-prettier": "^5.5.4",
        "jest": "^29.7.0",
        "jest-environment-jsdom": "^29.7.0",
-        "prettier": "3.5.3",
+        "prettier": "3.6.2",
        "tailwindcss": "^4",
        "ts-node": "^10.9.2",
        "tw-animate-css": "^1.2.9",
@ -1854,9 +1854,9 @@
      "integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw=="
    },
    "node_modules/@next/eslint-plugin-next": {
-      "version": "15.3.2",
-      "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.3.2.tgz",
-      "integrity": "sha512-ijVRTXBgnHT33aWnDtmlG+LJD+5vhc9AKTJPquGG5NKXjpKNjc62woIhFtrAcWdBobt8kqjCoaJ0q6sDQoX7aQ==",
+      "version": "15.5.2",
+      "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-15.5.2.tgz",
+      "integrity": "sha512-lkLrRVxcftuOsJNhWatf1P2hNVfh98k/omQHrCEPPriUypR6RcS13IvLdIrEvkm9AH2Nu2YpR5vLqBuy6twH3Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@ -2861,29 +2861,6 @@
        }
      }
    },
-    "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-visually-hidden": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
-      "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
-      "license": "MIT",
-      "dependencies": {
-        "@radix-ui/react-primitive": "2.1.3"
-      },
-      "peerDependencies": {
-        "@types/react": "*",
-        "@types/react-dom": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
-        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
-      },
-      "peerDependenciesMeta": {
-        "@types/react": {
-          "optional": true
-        },
-        "@types/react-dom": {
-          "optional": true
-        }
-      }
-    },
    "node_modules/@radix-ui/react-separator": {
      "version": "1.1.7",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz",
@ -2949,23 +2926,23 @@
      }
    },
    "node_modules/@radix-ui/react-tooltip": {
-      "version": "1.2.6",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.6.tgz",
-      "integrity": "sha512-zYb+9dc9tkoN2JjBDIIPLQtk3gGyz8FMKoqYTb8EMVQ5a5hBcdHPECrsZVI4NpPAUOixhkoqg7Hj5ry5USowfA==",
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz",
+      "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==",
      "license": "MIT",
      "dependencies": {
-        "@radix-ui/primitive": "1.1.2",
+        "@radix-ui/primitive": "1.1.3",
        "@radix-ui/react-compose-refs": "1.1.2",
        "@radix-ui/react-context": "1.1.2",
-        "@radix-ui/react-dismissable-layer": "1.1.9",
+        "@radix-ui/react-dismissable-layer": "1.1.11",
        "@radix-ui/react-id": "1.1.1",
-        "@radix-ui/react-popper": "1.2.6",
-        "@radix-ui/react-portal": "1.1.8",
-        "@radix-ui/react-presence": "1.1.4",
-        "@radix-ui/react-primitive": "2.1.2",
-        "@radix-ui/react-slot": "1.2.2",
+        "@radix-ui/react-popper": "1.2.8",
+        "@radix-ui/react-portal": "1.1.9",
+        "@radix-ui/react-presence": "1.1.5",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-slot": "1.2.3",
        "@radix-ui/react-use-controllable-state": "1.2.2",
-        "@radix-ui/react-visually-hidden": "1.2.2"
+        "@radix-ui/react-visually-hidden": "1.2.3"
      },
      "peerDependencies": {
        "@types/react": "*",
@ -2982,21 +2959,162 @@
        }
      }
    },
-    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": {
-      "version": "1.2.2",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.2.tgz",
-      "integrity": "sha512-y7TBO4xN4Y94FvcWIOIh18fM4R1A8S4q1jhoz4PNzOoHsFcN8pogcFmZrTYAm4F9VRUrWP/Mw7xSKybIeRI+CQ==",
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/primitive": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+      "license": "MIT"
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-arrow": {
+      "version": "1.1.7",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
+      "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
      "license": "MIT",
      "dependencies": {
-        "@radix-ui/react-compose-refs": "1.1.2"
+        "@radix-ui/react-primitive": "2.1.3"
      },
      "peerDependencies": {
        "@types/react": "*",
-        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-dismissable-layer": {
+      "version": "1.1.11",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
+      "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.3",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-escape-keydown": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-popper": {
+      "version": "1.2.8",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
+      "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
+      "license": "MIT",
+      "dependencies": {
+        "@floating-ui/react-dom": "^2.0.0",
+        "@radix-ui/react-arrow": "1.1.7",
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-context": "1.1.2",
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-callback-ref": "1.1.1",
+        "@radix-ui/react-use-layout-effect": "1.1.1",
+        "@radix-ui/react-use-rect": "1.1.1",
+        "@radix-ui/react-use-size": "1.1.1",
+        "@radix-ui/rect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-portal": {
+      "version": "1.1.9",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
+      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-primitive": "2.1.3",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-presence": {
+      "version": "1.1.5",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz",
+      "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.2",
+        "@radix-ui/react-use-layout-effect": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
        }
      }
    },
@ -3137,12 +3255,35 @@
      }
    },
    "node_modules/@radix-ui/react-visually-hidden": {
-      "version": "1.2.2",
-      "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.2.tgz",
-      "integrity": "sha512-ORCmRUbNiZIv6uV5mhFrhsIKw4UX/N3syZtyqvry61tbGm4JlgQuSn0hk5TwCARsCjkcnuRkSdCE3xfb+ADHew==",
+      "version": "1.2.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
+      "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
      "license": "MIT",
      "dependencies": {
-        "@radix-ui/react-primitive": "2.1.2"
+        "@radix-ui/react-primitive": "2.1.3"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-visually-hidden/node_modules/@radix-ui/react-primitive": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
+      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.2.3"
      },
      "peerDependencies": {
        "@types/react": "*",
@ -3910,12 +4051,12 @@
      "license": "MIT"
    },
    "node_modules/@types/node": {
-      "version": "20.17.47",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.47.tgz",
-      "integrity": "sha512-3dLX0Upo1v7RvUimvxLeXqwrfyKxUINk0EAM83swP2mlSUcwV73sZy8XhNz8bcZ3VbsfQyC/y6jRdL5tgCNpDQ==",
+      "version": "24.3.0",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz",
+      "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==",
      "license": "MIT",
      "dependencies": {
-        "undici-types": "~6.19.2"
+        "undici-types": "~7.10.0"
      }
    },
    "node_modules/@types/node-fetch": {
@ -6433,13 +6574,13 @@
      }
    },
    "node_modules/eslint-config-next": {
-      "version": "15.3.2",
-      "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.3.2.tgz",
-      "integrity": "sha512-FerU4DYccO4FgeYFFglz0SnaKRe1ejXQrDb8kWUkTAg036YWi+jUsgg4sIGNCDhAsDITsZaL4MzBWKB6f4G1Dg==",
+      "version": "15.5.2",
+      "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-15.5.2.tgz",
+      "integrity": "sha512-3hPZghsLupMxxZ2ggjIIrat/bPniM2yRpsVPVM40rp8ZMzKWOJp2CGWn7+EzoV2ddkUr5fxNfHpF+wU1hGt/3g==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@next/eslint-plugin-next": "15.3.2",
+        "@next/eslint-plugin-next": "15.5.2",
        "@rushstack/eslint-patch": "^1.10.3",
        "@typescript-eslint/eslint-plugin": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
        "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || ^7.0.0 || ^8.0.0",
@ -7268,13 +7409,13 @@
      }
    },
    "node_modules/framer-motion": {
-      "version": "11.18.2",
-      "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.18.2.tgz",
-      "integrity": "sha512-5F5Och7wrvtLVElIpclDT0CBzMVg3dL22B64aZwHtsIY8RB4mXICLrkajK4G9R+ieSAGcgrLeae2SeUTg2pr6w==",
+      "version": "12.23.12",
+      "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-12.23.12.tgz",
+      "integrity": "sha512-6e78rdVtnBvlEVgu6eFEAgG9v3wLnYEboM8I5O5EXvfKC8gxGQB8wXJdhkMy10iVcn05jl6CNw7/HTsTCfwcWg==",
      "license": "MIT",
      "dependencies": {
-        "motion-dom": "^11.18.1",
-        "motion-utils": "^11.18.1",
+        "motion-dom": "^12.23.12",
+        "motion-utils": "^12.23.6",
        "tslib": "^2.4.0"
      },
      "peerDependencies": {
@ -10006,9 +10147,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.2.19",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.19.tgz",
-      "integrity": "sha512-sDuAhUdEGlERZ3jlMUzPXcQTgMv/pGbDrPX0ifbE5S+gr7Q+7ohuQYrIXe+hXgIipFjq+y4b2c5laZ76tmAyEA==",
+      "version": "0.2.20",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.20.tgz",
+      "integrity": "sha512-1vD5nizTX5JEW8TADxKgy/P1W8YZoPSpdnmfxbdYbWgpQ3BWtbvLS6jmDk7VwVA5fRC4895VfHsRDfS1liHarw==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",
@ -11184,18 +11325,18 @@
      }
    },
    "node_modules/motion-dom": {
-      "version": "11.18.1",
-      "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-11.18.1.tgz",
-      "integrity": "sha512-g76KvA001z+atjfxczdRtw/RXOM3OMSdd1f4DL77qCTF/+avrRJiawSG4yDibEQ215sr9kpinSlX2pCTJ9zbhw==",
+      "version": "12.23.12",
+      "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.23.12.tgz",
+      "integrity": "sha512-RcR4fvMCTESQBD/uKQe49D5RUeDOokkGRmz4ceaJKDBgHYtZtntC/s2vLvY38gqGaytinij/yi3hMcWVcEF5Kw==",
      "license": "MIT",
      "dependencies": {
-        "motion-utils": "^11.18.1"
+        "motion-utils": "^12.23.6"
      }
    },
    "node_modules/motion-utils": {
-      "version": "11.18.1",
-      "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-11.18.1.tgz",
-      "integrity": "sha512-49Kt+HKjtbJKLtgO/LKj9Ld+6vw9BjH5d9sc40R/kVyH8GLAXgT42M2NnuPcJNuA3s9ZfZBUcwIgpmZWGEE+hA==",
+      "version": "12.23.6",
+      "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.23.6.tgz",
+      "integrity": "sha512-eAWoPgr4eFEOFfg2WjIsMoqJTW6Z8MTUCgn/GZ3VRpClWBdnbjryiA3ZSNLyxCTmCQx4RmYX6jX1iWHbenUPNQ==",
      "license": "MIT"
    },
    "node_modules/ms": {
@ -12083,9 +12224,9 @@
      }
    },
    "node_modules/prettier": {
-      "version": "3.5.3",
-      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.5.3.tgz",
-      "integrity": "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw==",
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.6.2.tgz",
+      "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==",
      "dev": true,
      "license": "MIT",
      "bin": {
@ -13986,9 +14127,9 @@
      }
    },
    "node_modules/undici-types": {
-      "version": "6.19.8",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
-      "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
+      "version": "7.10.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
+      "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
      "license": "MIT"
    },
    "node_modules/unified": {
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@ -19,11 +19,11 @@
    "@radix-ui/react-select": "^2.2.5",
    "@radix-ui/react-separator": "^1.1.7",
    "@radix-ui/react-slot": "^1.2.3",
-    "@radix-ui/react-tooltip": "^1.2.6",
+    "@radix-ui/react-tooltip": "^1.2.8",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
-    "framer-motion": "^11.18.2",
-    "llama-stack-client": "^0.2.19",
+    "framer-motion": "^12.23.12",
+    "llama-stack-client": "^0.2.20",
    "lucide-react": "^0.510.0",
    "next": "15.3.3",
    "next-auth": "^4.24.11",
@ -44,16 +44,16 @@
    "@testing-library/jest-dom": "^6.8.0",
    "@testing-library/react": "^16.3.0",
    "@types/jest": "^29.5.14",
-    "@types/node": "^20",
+    "@types/node": "^24",
    "@types/react": "^19",
    "@types/react-dom": "^19",
    "eslint": "^9",
-    "eslint-config-next": "15.3.2",
+    "eslint-config-next": "15.5.2",
    "eslint-config-prettier": "^10.1.8",
    "eslint-plugin-prettier": "^5.5.4",
    "jest": "^29.7.0",
    "jest-environment-jsdom": "^29.7.0",
-    "prettier": "3.5.3",
+    "prettier": "3.6.2",
    "tailwindcss": "^4",
    "ts-node": "^10.9.2",
    "tw-animate-css": "^1.2.9",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -7,7 +7,7 @@ required-version = ">=0.7.0"

 [project]
 name = "llama_stack"
-version = "0.2.19"
+version = "0.2.20"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@ -31,9 +31,8 @@ dependencies = [
    "huggingface-hub>=0.34.0,<1.0",
    "jinja2>=3.1.6",
    "jsonschema",
-    "llama-stack-client>=0.2.19",
-    "llama-api-client>=0.1.2",
-    "openai>=1.99.6,<1.100.0",
+    "llama-stack-client>=0.2.20",
+    "openai>=1.99.6",
    "prompt-toolkit",
    "python-dotenv",
    "python-jose[cryptography]",
@ -56,7 +55,7 @@ dependencies = [
 ui = [
    "streamlit",
    "pandas",
-    "llama-stack-client>=0.2.19",
+    "llama-stack-client>=0.2.20",
    "streamlit-option-menu",
 ]

@ -84,6 +83,7 @@ unit = [
    "openai",
    "aiosqlite",
    "aiohttp",
+    "psycopg2-binary>=2.9.0",
    "pypdf",
    "mcp",
    "chardet",
@ -92,7 +92,7 @@ unit = [
    "sqlalchemy[asyncio]>=2.0.41",
    "blobfile",
    "faiss-cpu",
-    "pymilvus>=2.5.12",
+    "pymilvus>=2.6.1",
    "milvus-lite>=2.5.0",
    "litellm",
    "together",
@ -105,12 +105,13 @@ unit = [
 # separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
 # dependencies.
 test = [
-    "openai",
+    "openai>=1.100.0",  # for expires_after support
    "aiosqlite",
    "aiohttp",
    "torch>=2.6.0",
    "torchvision>=0.21.0",
    "chardet",
+    "psycopg2-binary>=2.9.0",
    "pypdf",
    "mcp",
    "datasets",
@ -119,7 +120,7 @@ test = [
    "sqlalchemy",
    "sqlalchemy[asyncio]>=2.0.41",
    "requests",
-    "pymilvus>=2.5.12",
+    "pymilvus>=2.6.1",
    "milvus-lite>=2.5.0",
    "weaviate-client>=4.16.4",
 ]
@ -144,7 +145,7 @@ docs = [
 ]
 codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
 benchmark = [
-    "locust>=2.37.14",
+    "locust>=2.39.1",
 ]

 [project.urls]
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -140,13 +140,6 @@ THIS_DIR=$(dirname "$0")
 ROOT_DIR="$THIS_DIR/.."
 cd $ROOT_DIR

-# Set recording directory
-if [[ "$RUN_VISION_TESTS" == "true" ]]; then
-    export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision"
-else
-    export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings"
-fi
-
 # check if "llama" and "pytest" are available. this script does not use `uv run` given
 # it can be used in a pre-release environment where we have not been able to tell
 # uv about pre-release dependencies properly (yet).
@ -298,5 +291,12 @@ echo "=== System Resources After Tests ==="
 free -h 2>/dev/null || echo "free command not available"
 df -h

+# stop server
+if [[ "$STACK_CONFIG" == *"server:"* ]]; then
+    echo "Stopping Llama Stack Server..."
+    kill $(lsof -i :8321 | awk 'NR>1 {print $2}')
+    echo "Llama Stack Server stopped"
+fi
+
 echo ""
 echo "=== Integration Tests Complete ==="
--- a/tests/README.md
+++ b/tests/README.md
@ -38,26 +38,15 @@ For running integration tests, you must provide a few things:
  - a distribution name (e.g., `starter`) or a path to a `run.yaml` file
  - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.

- Whether you are using replay or live mode for inference. This is specified with the LLAMA_STACK_TEST_INFERENCE_MODE environment variable. The default mode currently is "live" -- that is certainly surprising, but we will fix this soon.
-
 - Any API keys you need to use should be set in the environment, or can be passed in with the --env option.

 You can run the integration tests in replay mode with:
 ```bash
 # Run all tests with existing recordings
-LLAMA_STACK_TEST_INFERENCE_MODE=replay \
-  LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
  uv run --group test \
  pytest -sv tests/integration/ --stack-config=starter
 ```

-If you don't specify LLAMA_STACK_TEST_INFERENCE_MODE, by default it will be in "live" mode -- that is, it will make real API calls.
-
-```bash
-# Test against live APIs
-FIREWORKS_API_KEY=your_key pytest -sv tests/integration/inference --stack-config=starter
-```
-
 ### Re-recording tests

 #### Local Re-recording (Manual Setup Required)
@ -66,7 +55,6 @@ If you want to re-record tests locally, you can do so with:

 ```bash
 LLAMA_STACK_TEST_INFERENCE_MODE=record \
-  LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
  uv run --group test \
  pytest -sv tests/integration/ --stack-config=starter -k "<appropriate test name>"
 ```
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@ -98,29 +98,25 @@ pytest -s -v tests/integration/vector_io/ \

 The testing system supports three modes controlled by environment variables:

-### LIVE Mode (Default)
-Tests make real API calls:
+### REPLAY Mode (Default)
+Uses cached responses instead of making API calls:
 ```bash
-LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/
+pytest tests/integration/
 ```
-
 ### RECORD Mode
 Captures API interactions for later replay:
 ```bash
 LLAMA_STACK_TEST_INFERENCE_MODE=record \
-LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
 pytest tests/integration/inference/test_new_feature.py
 ```

-### REPLAY Mode
-Uses cached responses instead of making API calls:
+### LIVE Mode
+Tests make real API calls (but not recorded):
 ```bash
-LLAMA_STACK_TEST_INFERENCE_MODE=replay \
-LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
-pytest tests/integration/
+LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/
 ```

-Note that right now you must specify the recording directory. This is because different tests use different recording directories and we don't (yet) have a fool-proof way to map a test to a recording directory. We are working on this.
+By default, the recording directory is `tests/integration/recordings`. You can override this by setting the `LLAMA_STACK_TEST_RECORDING_DIR` environment variable.

 ## Managing Recordings

@ -146,7 +142,6 @@ See the [main testing guide](../README.md#remote-re-recording-recommended) for f
 ```bash
 # Re-record specific tests
 LLAMA_STACK_TEST_INFERENCE_MODE=record \
-LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
 pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py
 ```

--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -30,6 +30,8 @@ def pytest_runtest_makereport(item, call):
 def pytest_sessionstart(session):
    # stop macOS from complaining about duplicate OpenMP libraries
    os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
+    if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
+        os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"


 def pytest_runtest_teardown(item):
--- a/tests/integration/files/test_files.py
+++ b/tests/integration/files/test_files.py
@ -8,6 +8,7 @@ from io import BytesIO
 from unittest.mock import patch

 import pytest
+import requests

 from llama_stack.core.datatypes import User

@ -79,6 +80,88 @@ def test_openai_client_basic_operations(openai_client):
                pass  # ignore 404


+@pytest.mark.xfail(message="expires_after not available on all providers")
+def test_expires_after(openai_client):
+    """Test uploading a file with expires_after parameter."""
+    client = openai_client
+
+    uploaded_file = None
+    try:
+        with BytesIO(b"expires_after test") as file_buffer:
+            file_buffer.name = "expires_after.txt"
+            uploaded_file = client.files.create(
+                file=file_buffer,
+                purpose="assistants",
+                expires_after={"anchor": "created_at", "seconds": 4545},
+            )
+
+        assert uploaded_file.expires_at is not None
+        assert uploaded_file.expires_at == uploaded_file.created_at + 4545
+
+        listed = client.files.list()
+        ids = [f.id for f in listed.data]
+        assert uploaded_file.id in ids
+
+        retrieved = client.files.retrieve(uploaded_file.id)
+        assert retrieved.id == uploaded_file.id
+
+    finally:
+        if uploaded_file is not None:
+            try:
+                client.files.delete(uploaded_file.id)
+            except Exception:
+                pass
+
+
+@pytest.mark.xfail(message="expires_after not available on all providers")
+def test_expires_after_requests(openai_client):
+    """Upload a file using requests multipart/form-data and bracketed expires_after fields.
+
+    This ensures clients that send form fields like `expires_after[anchor]` and
+    `expires_after[seconds]` are handled by the server.
+    """
+    base_url = f"{openai_client.base_url}files"
+
+    uploaded_id = None
+    try:
+        files = {"file": ("expires_after_with_requests.txt", BytesIO(b"expires_after via requests"))}
+        data = {
+            "purpose": "assistants",
+            "expires_after[anchor]": "created_at",
+            "expires_after[seconds]": "4545",
+        }
+
+        session = requests.Session()
+        request = requests.Request("POST", base_url, files=files, data=data)
+        prepared = session.prepare_request(request)
+        resp = session.send(prepared, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+
+        assert result.get("id", "").startswith("file-")
+        uploaded_id = result["id"]
+        assert result.get("created_at") is not None
+        assert result.get("expires_at") == result["created_at"] + 4545
+
+        list_resp = requests.get(base_url, timeout=30)
+        list_resp.raise_for_status()
+        listed = list_resp.json()
+        ids = [f["id"] for f in listed.get("data", [])]
+        assert uploaded_id in ids
+
+        retrieve_resp = requests.get(f"{base_url}/{uploaded_id}", timeout=30)
+        retrieve_resp.raise_for_status()
+        retrieved = retrieve_resp.json()
+        assert retrieved["id"] == uploaded_id
+
+    finally:
+        if uploaded_id:
+            try:
+                requests.delete(f"{base_url}/{uploaded_id}", timeout=30)
+            except Exception:
+                pass
+
+
@pytest.mark.xfail(message="User isolation broken for current providers, must be fixed.")
@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 def test_files_authentication_isolation(mock_get_authenticated_user, llama_stack_client):
--- a/tests/integration/recordings/responses/00ba04f74a96.json
+++ b/tests/integration/recordings/responses/00ba04f74a96.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:53.860911Z",
+        "created_at": "2025-09-03T17:37:35.23084Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 249137667,
-        "load_duration": 152509542,
+        "total_duration": 195981375,
+        "load_duration": 110522917,
        "prompt_eval_count": 216,
-        "prompt_eval_duration": 71000000,
+        "prompt_eval_duration": 72393958,
        "eval_count": 2,
-        "eval_duration": 24000000,
+        "eval_duration": 11843000,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/04172112ffbb.json
+++ b/tests/integration/recordings/responses/04172112ffbb.json
@ -21,7 +21,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:18.033900164Z",
+          "created_at": "2025-09-03T17:41:43.950283Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -39,7 +39,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:18.213371151Z",
+          "created_at": "2025-09-03T17:41:43.991122Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -57,7 +57,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:18.387513976Z",
+          "created_at": "2025-09-03T17:41:44.031378Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -75,7 +75,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:18.564344287Z",
+          "created_at": "2025-09-03T17:41:44.073098Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -93,7 +93,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:18.746579415Z",
+          "created_at": "2025-09-03T17:41:44.115961Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -111,7 +111,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:18.923276047Z",
+          "created_at": "2025-09-03T17:41:44.156517Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -129,7 +129,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:19.099961963Z",
+          "created_at": "2025-09-03T17:41:44.197079Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -147,7 +147,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:19.275621884Z",
+          "created_at": "2025-09-03T17:41:44.237565Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -165,7 +165,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:19.452204196Z",
+          "created_at": "2025-09-03T17:41:44.277755Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -183,7 +183,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:19.626937514Z",
+          "created_at": "2025-09-03T17:41:44.318476Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -201,7 +201,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:19.805566767Z",
+          "created_at": "2025-09-03T17:41:44.358628Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -219,7 +219,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:19.985987477Z",
+          "created_at": "2025-09-03T17:41:44.398984Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -237,7 +237,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:20.166458601Z",
+          "created_at": "2025-09-03T17:41:44.439232Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -255,7 +255,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:20.343346795Z",
+          "created_at": "2025-09-03T17:41:44.479478Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -273,7 +273,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:20.525008091Z",
+          "created_at": "2025-09-03T17:41:44.520202Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -291,7 +291,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:20.709087695Z",
+          "created_at": "2025-09-03T17:41:44.560517Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -309,7 +309,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:20.887074305Z",
+          "created_at": "2025-09-03T17:41:44.601592Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -327,15 +327,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-07-31T17:59:21.065244925Z",
+          "created_at": "2025-09-03T17:41:44.642064Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 4373531496,
-          "load_duration": 44438132,
+          "total_duration": 887142667,
+          "load_duration": 119331417,
          "prompt_eval_count": 56,
-          "prompt_eval_duration": 1296273199,
+          "prompt_eval_duration": 74294709,
          "eval_count": 18,
-          "eval_duration": 3032321735,
+          "eval_duration": 692842791,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/0b27fd737699.json
+++ b/tests/integration/recordings/responses/0b27fd737699.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:13:57.556416Z",
+        "created_at": "2025-09-03T17:37:47.461886Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 432363250,
-        "load_duration": 159296417,
+        "total_duration": 338927833,
+        "load_duration": 100895125,
        "prompt_eval_count": 223,
-        "prompt_eval_duration": 257000000,
+        "prompt_eval_duration": 221583042,
        "eval_count": 2,
-        "eval_duration": 14000000,
+        "eval_duration": 12341416,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/0b3f2e4754ff.json
+++ b/tests/integration/recordings/responses/0b3f2e4754ff.json
@ -24,7 +24,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -39,7 +39,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921333,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -50,7 +50,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -65,7 +65,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921333,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -76,7 +76,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -91,7 +91,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921333,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -102,7 +102,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -117,7 +117,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921333,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -128,7 +128,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -143,7 +143,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921334,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -154,7 +154,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -169,7 +169,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921334,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -180,7 +180,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -195,7 +195,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921334,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -206,7 +206,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-29",
+          "id": "chatcmpl-414",
          "choices": [
            {
              "delta": {
@ -221,7 +221,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090031,
+          "created": 1756921334,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
--- a/tests/integration/recordings/responses/0e8f2b001dd9.json
+++ b/tests/integration/recordings/responses/0e8f2b001dd9.json
@ -1,7 +1,7 @@
 {
  "request": {
    "method": "POST",
-    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
@ -20,14 +20,14 @@
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
-        "id": "chatcmpl-368",
+        "id": "chatcmpl-161",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
-              "content": "Saturn is known for its extensive ring system.",
+              "content": "The answer is Saturn.",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
@ -37,15 +37,15 @@
            }
          }
        ],
-        "created": 1754081853,
+        "created": 1756921364,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
-          "completion_tokens": 11,
+          "completion_tokens": 6,
          "prompt_tokens": 39,
-          "total_tokens": 50,
+          "total_tokens": 45,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
--- a/tests/integration/recordings/responses/10eea8c15ddc.json
+++ b/tests/integration/recordings/responses/10eea8c15ddc.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:51.682357Z",
+        "created_at": "2025-09-03T17:37:33.473237Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 238161000,
-        "load_duration": 72494750,
+        "total_duration": 279025042,
+        "load_duration": 162673250,
        "prompt_eval_count": 212,
-        "prompt_eval_duration": 87000000,
+        "prompt_eval_duration": 73595834,
        "eval_count": 5,
-        "eval_duration": 74000000,
+        "eval_duration": 41950291,
        "response": "unsafe\nS8",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/140187e305dc.json
+++ b/tests/integration/recordings/responses/140187e305dc.json
@ -1,7 +1,7 @@
 {
  "request": {
    "method": "POST",
-    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
@ -20,14 +20,14 @@
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
-        "id": "chatcmpl-876",
+        "id": "chatcmpl-974",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
-              "content": "I'm afraid I don't have a built-in ability to directly interface with or \"test\" OpenAI models, including the original GPT-1 model. However, I can explain how you might approach this task:\n\nThe OpenAI GPT-1 is a large transformer-based language model that was trained on a massive dataset of text and achieved state-of-the-art results in various natural language processing tasks.\n\nTo test or evaluate the performance of a model like GPT-1, you would typically follow these steps:\n\n1. **Get access to the OpenAI API**: The OpenAI API provides a way for developers to interact with the GPT-1 model programmatically. You can sign up for an API key on the OpenAI website.\n2. **Choose a testing platform or environment**: You'll need a compute platform that supports the necessary algorithms and data structures to run inference on the GPT-1 model. Some popular options include AWS, Google Cloud, or Azure Compute Virtual Machines.\n3. **Prepare your test input data**: This will involve creating text inputs in the format expected by the OpenAI API (i.e., a JSON object containing the text to be processed).\n4. **Use the OpenAI Python library or SDK**: The OpenAI Python library provides an easy-to-use interface for interacting with the GPT-1 model through the API.\n\nHere's some example code that demonstrates how you might use the OpenAI Flask API to test a single input:\n\n```python\nfrom flask import Flask, request, jsonify\nimport json\n\napp = Flask(__name__)\n\n@ app . route ( '/ /gpt-en ', ' Text ', methods = ['POST'])\ndef gpt_en () -> Json :\n    data = request . get_json ()\n    if not data or \"message\" in ( data ):\n        return None , 400 , { ' error' : \"Input must be a text string.\" }\n    response = []\n    while True:\n        message = \"\"\n        for token in data [\"input\"]:\n            response_text = f\"{data['prompt']} {token}\"\n            data[\"input\"] = [response_text]\n            new_response = gpt_en()(data)\n            if all([not item or not isinstance(item, dict) for item in new_response]):\n             break\n\n        message = json . dumps ({}\"text\": response_text})\n        response.append(message)\n\n    return jsonify ({\"output\": response}), 200 , {}\n\nif __name__ == \"__main__\":\n   app.run(debug=True)\n```\n\n5. **Evaluate the output**: Once you have processed your test input data using the GPT-1 model, you can evaluate the accuracy of the generated responses.\n\nKeep in mind that this is just a basic example to illustrate how you might approach testing the OpenAI GPT-1 model.",
+              "content": "I'm happy to help you test the OpenAI API, however I can not access the API.\n\nInstead why don't we follow these steps:\n\n*   Check documentation\n*   Contact support\n*   Reach out to their community forum. \n\nLet me know if I can be of any additional assistance",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
@ -37,15 +37,15 @@
            }
          }
        ],
-        "created": 1754510050,
+        "created": 1756921202,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
-          "completion_tokens": 567,
+          "completion_tokens": 61,
          "prompt_tokens": 31,
-          "total_tokens": 598,
+          "total_tokens": 92,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
--- a/tests/integration/recordings/responses/17253d7cc667.json
+++ b/tests/integration/recordings/responses/17253d7cc667.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:52.919624Z",
+        "created_at": "2025-09-03T17:37:34.308033Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 201956834,
-        "load_duration": 105132584,
+        "total_duration": 200296000,
+        "load_duration": 115974708,
        "prompt_eval_count": 212,
-        "prompt_eval_duration": 75000000,
+        "prompt_eval_duration": 72173459,
        "eval_count": 2,
-        "eval_duration": 20000000,
+        "eval_duration": 11536750,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/173ecb3aab28.json
+++ b/tests/integration/recordings/responses/173ecb3aab28.json
@ -40,7 +40,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -55,7 +55,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -66,7 +66,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -81,7 +81,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -92,7 +92,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -107,7 +107,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -118,7 +118,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -133,7 +133,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -144,7 +144,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -159,7 +159,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -170,7 +170,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -185,7 +185,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -196,7 +196,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -211,7 +211,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -222,7 +222,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-457",
+          "id": "chatcmpl-921",
          "choices": [
            {
              "delta": {
@ -237,7 +237,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754090032,
+          "created": 1756920971,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
--- a/tests/integration/recordings/responses/174458ad71b2.json
+++ b/tests/integration/recordings/responses/174458ad71b2.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:53.580806Z",
+        "created_at": "2025-09-03T17:37:34.994704Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 205732750,
-        "load_duration": 98967000,
+        "total_duration": 339570875,
+        "load_duration": 262794125,
        "prompt_eval_count": 213,
-        "prompt_eval_duration": 86000000,
+        "prompt_eval_duration": 64061000,
        "eval_count": 2,
-        "eval_duration": 18000000,
+        "eval_duration": 11839042,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/178016edef0e.json
+++ b/tests/integration/recordings/responses/178016edef0e.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:52.354566Z",
+        "created_at": "2025-09-03T17:37:33.769233Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 605192500,
-        "load_duration": 457087166,
+        "total_duration": 253836584,
+        "load_duration": 138624959,
        "prompt_eval_count": 210,
-        "prompt_eval_duration": 63000000,
+        "prompt_eval_duration": 69496125,
        "eval_count": 5,
-        "eval_duration": 84000000,
+        "eval_duration": 45062833,
        "response": "unsafe\nS12",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/197228e26971.json
+++ b/tests/integration/recordings/responses/197228e26971.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:52.686478Z",
+        "created_at": "2025-09-03T17:37:34.074233Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 304136208,
-        "load_duration": 155977000,
+        "total_duration": 270746375,
+        "load_duration": 156423042,
        "prompt_eval_count": 213,
-        "prompt_eval_duration": 71000000,
+        "prompt_eval_duration": 70338083,
        "eval_count": 5,
-        "eval_duration": 76000000,
+        "eval_duration": 43379167,
        "response": "unsafe\nS2",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/198ef7208389.json
+++ b/tests/integration/recordings/responses/198ef7208389.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:51.186501Z",
+        "created_at": "2025-09-03T17:37:32.84197Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 3146184459,
-        "load_duration": 2533467917,
+        "total_duration": 21572898667,
+        "load_duration": 21155275042,
        "prompt_eval_count": 212,
-        "prompt_eval_duration": 526000000,
+        "prompt_eval_duration": 371898125,
        "eval_count": 5,
-        "eval_duration": 83000000,
+        "eval_duration": 43290458,
        "response": "unsafe\nS1",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/1adfaa0e062e.json
+++ b/tests/integration/recordings/responses/1adfaa0e062e.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:12:53.332041Z",
+        "created_at": "2025-09-03T17:37:34.607413Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 365895333,
-        "load_duration": 257825208,
+        "total_duration": 267812042,
+        "load_duration": 181570000,
        "prompt_eval_count": 213,
-        "prompt_eval_duration": 78000000,
+        "prompt_eval_duration": 73947375,
        "eval_count": 2,
-        "eval_duration": 28000000,
+        "eval_duration": 11708000,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/1b8394f90636.json
+++ b/tests/integration/recordings/responses/1b8394f90636.json
@ -22,15 +22,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-08-04T22:55:05.685988Z",
+        "created_at": "2025-09-03T17:36:13.821929Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 14128980625,
-        "load_duration": 7220159208,
+        "total_duration": 1907912167,
+        "load_duration": 90979292,
        "prompt_eval_count": 18,
-        "prompt_eval_duration": 4658000000,
+        "prompt_eval_duration": 77350291,
        "eval_count": 43,
-        "eval_duration": 2224000000,
+        "eval_duration": 1738568334,
        "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/1b92be674e2a.json
+++ b/tests/integration/recordings/responses/1b92be674e2a.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-31T17:50:06.140190726Z",
+        "created_at": "2025-09-03T17:39:38.236797Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 5213341378,
-        "load_duration": 43943569,
+        "total_duration": 1296281500,
+        "load_duration": 283393917,
        "prompt_eval_count": 23,
-        "prompt_eval_duration": 1049424427,
+        "prompt_eval_duration": 75453042,
        "eval_count": 24,
-        "eval_duration": 4119422888,
+        "eval_duration": 936860125,
        "response": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/1e11c2b20ff8.json
+++ b/tests/integration/recordings/responses/1e11c2b20ff8.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "How do systems learn automatically?"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              0.042460807,
+              -0.06189971,
+              -0.0784711,
+              0.0064329687,
+              0.03129365,
+              0.00807445,
+              0.05801836,
+              0.025447326,
+              0.016402787,
+              0.045995634,
+              -0.028924342,
+              0.04451832,
+              0.05686613,
+              -0.015340794,
+              -0.07020505,
+              -0.057178136,
+              -0.07683263,
+              0.006748679,
+              0.0043323045,
+              -0.123651944,
+              0.0031534543,
+              -0.03258051,
+              -0.02936216,
+              0.024140852,
+              -0.028559243,
+              0.10224467,
+              0.0021632623,
+              -0.006975691,
+              0.025292527,
+              -0.055500276,
+              0.031231727,
+              -0.0070274337,
+              0.08430815,
+              -0.028431177,
+              -0.083029,
+              0.009555893,
+              -0.020029299,
+              -0.00243229,
+              -0.00768719,
+              -0.023077851,
+              -0.09293533,
+              -0.042625993,
+              -0.020000124,
+              0.008240663,
+              0.060970567,
+              0.050315727,
+              -0.0510085,
+              -0.008543903,
+              -0.030227834,
+              -0.03582846,
+              -0.17836656,
+              -0.047279052,
+              0.033892106,
+              0.031623542,
+              -0.008832113,
+              0.10480918,
+              0.033559043,
+              0.090348184,
+              -0.015757555,
+              -0.0125672715,
+              -0.084686965,
+              -0.114781834,
+              -0.13755985,
+              0.021652374,
+              0.047834594,
+              0.043243896,
+              0.008659893,
+              0.038724966,
+              0.046716973,
+              -0.077413626,
+              -0.04887495,
+              0.031287406,
+              0.022356613,
+              0.00043283988,
+              0.052321073,
+              -0.012254071,
+              -0.035172574,
+              -0.00825216,
+              -0.008866574,
+              -0.034267236,
+              -0.04576201,
+              0.002467568,
+              -0.040877618,
+              0.08047682,
+              0.09472728,
+              0.0413438,
+              0.0057974122,
+              0.044982508,
+              0.025369909,
+              0.006618073,
+              0.010467276,
+              -0.07960384,
+              -0.03108485,
+              -0.03528749,
+              0.01831391,
+              0.053473305,
+              0.06568304,
+              -0.07259002,
+              0.02523736,
+              0.10520362,
+              0.035732146,
+              0.028157586,
+              0.011687256,
+              0.044207197,
+              0.012604437,
+              0.0018819098,
+              0.03926183,
+              0.043135095,
+              0.09784739,
+              -0.08801336,
+              -0.06060836,
+              0.02681984,
+              0.0041358666,
+              0.033492945,
+              0.011799116,
+              0.009551661,
+              -0.0095491735,
+              -0.021212189,
+              -0.008917248,
+              0.029352615,
+              -0.012693442,
+              -0.019269384,
+              0.009901157,
+              -0.00812101,
+              0.018603146,
+              -0.0007501193,
+              -0.056115113,
+              -3.8018077e-33,
+              0.020848714,
+              0.0047160466,
+              0.019726405,
+              0.06024251,
+              -0.0685974,
+              -0.07497267,
+              0.007997452,
+              -0.047339544,
+              0.057801835,
+              0.049544968,
+              0.01878086,
+              0.03274472,
+              0.017663997,
+              0.07483022,
+              0.02496901,
+              -0.011843339,
+              -0.11212756,
+              0.0070379525,
+              0.028099466,
+              -0.01746246,
+              0.08173482,
+              -0.007920462,
+              0.032095373,
+              -0.12300146,
+              0.033773854,
+              0.025873141,
+              -0.0045020077,
+              0.079493225,
+              0.0040725255,
+              0.03305898,
+              0.008061117,
+              0.0134422695,
+              -0.03292251,
+              0.031554114,
+              0.04013794,
+              0.0014983519,
+              0.030762345,
+              0.029481992,
+              0.041350223,
+              -0.047438618,
+              0.03944708,
+              -0.07526981,
+              0.037927423,
+              -0.026016014,
+              0.016933467,
+              0.0136799775,
+              0.0071263947,
+              -0.05386736,
+              -0.07443268,
+              -0.006070775,
+              0.024427462,
+              -0.039844982,
+              -0.020661902,
+              -0.033354662,
+              0.009005565,
+              0.12111172,
+              -0.028260944,
+              -0.036192853,
+              -0.021332363,
+              0.05333571,
+              0.05161245,
+              -0.01204843,
+              0.035563566,
+              0.05408247,
+              0.060722187,
+              0.07159865,
+              0.04299143,
+              0.008544481,
+              0.07421879,
+              0.00841512,
+              -0.036342908,
+              -0.008549791,
+              -0.08816386,
+              -0.049075164,
+              0.00029373015,
+              -0.05127952,
+              0.03586739,
+              -0.030380003,
+              -0.012642127,
+              0.018771531,
+              0.01711824,
+              -0.06644723,
+              0.023793438,
+              0.0010271219,
+              -0.01939443,
+              -0.053452212,
+              -0.017060323,
+              -0.062207118,
+              -0.05962535,
+              -0.012172617,
+              -0.013190802,
+              -0.037036054,
+              0.00082622556,
+              0.098088354,
+              0.024690514,
+              2.1767905e-33,
+              -0.010088812,
+              -0.016811697,
+              -0.042140447,
+              0.08837209,
+              -0.028899776,
+              -0.0048947735,
+              -0.082139015,
+              0.029238816,
+              -0.043079354,
+              -0.014153092,
+              -0.028387645,
+              0.025998218,
+              -0.017625,
+              0.046511114,
+              -0.005768211,
+              0.030010609,
+              0.011375536,
+              0.017426634,
+              0.055062976,
+              0.032230247,
+              -0.07995765,
+              0.032486655,
+              -0.060016844,
+              -0.011561194,
+              0.010211269,
+              0.046528235,
+              0.001191399,
+              0.0786961,
+              -0.0446158,
+              0.032789085,
+              0.0023115936,
+              -0.03886269,
+              -0.017663589,
+              0.07913024,
+              -0.004583343,
+              0.043521065,
+              -0.031589273,
+              0.008867868,
+              -0.05013296,
+              0.068929516,
+              0.043675046,
+              0.019968731,
+              -0.08471742,
+              -0.046864275,
+              -0.0068198936,
+              -0.026138468,
+              -0.05107216,
+              0.054374695,
+              0.03069186,
+              -0.010925094,
+              0.04721093,
+              -0.017387696,
+              -0.020754937,
+              -0.081763394,
+              -0.027709637,
+              0.035980806,
+              0.05396534,
+              0.044874854,
+              0.059699643,
+              0.041227758,
+              -0.06664364,
+              -0.09201654,
+              0.008915574,
+              0.025849758,
+              -0.038651932,
+              -0.0044070315,
+              -0.052066546,
+              0.027435115,
+              0.012089562,
+              0.048306923,
+              0.059854515,
+              0.097325735,
+              -0.053612895,
+              -0.07639326,
+              0.015773866,
+              -0.0444848,
+              -0.13214406,
+              -0.0702488,
+              -0.10134438,
+              -0.11905995,
+              -0.027714504,
+              0.006891868,
+              -0.0053650527,
+              0.054135524,
+              -0.111159205,
+              0.07835098,
+              0.03506018,
+              0.016036613,
+              0.021490784,
+              -0.061526407,
+              0.007425222,
+              0.04833579,
+              -0.01361202,
+              0.012450488,
+              -0.12729599,
+              -1.4009424e-08,
+              -0.040908325,
+              -0.01596458,
+              0.060048707,
+              0.03804525,
+              0.0663794,
+              0.04727275,
+              -0.016112225,
+              0.09687414,
+              -0.04424251,
+              -0.028799534,
+              -0.01294642,
+              0.013026413,
+              0.022404836,
+              0.04713173,
+              0.06402557,
+              0.12130648,
+              0.06062839,
+              0.10218965,
+              -0.0757528,
+              -0.023806982,
+              0.12489501,
+              -0.045460615,
+              0.09545599,
+              0.021262301,
+              0.03731495,
+              -0.075220875,
+              -0.0026194793,
+              0.0472452,
+              0.048499025,
+              0.12358729,
+              0.017998053,
+              0.013811017,
+              -0.035893846,
+              -0.051789004,
+              0.06182457,
+              0.05160056,
+              0.008895317,
+              -0.12500942,
+              0.016453298,
+              -0.08590811,
+              -0.071096726,
+              0.06987216,
+              -0.036072273,
+              -0.0053715096,
+              -0.048762616,
+              0.00081640907,
+              -0.021502526,
+              -0.061078615,
+              0.002485032,
+              -0.032720752,
+              0.045743283,
+              0.038934175,
+              -0.024666062,
+              0.025897244,
+              0.10301431,
+              -0.013001504,
+              0.04783332,
+              -0.07114252,
+              0.046031926,
+              0.080549754,
+              -0.10302451,
+              0.08449227,
+              0.028010191,
+              -0.03697792
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/211b1562d4e6.json
+++ b/tests/integration/recordings/responses/211b1562d4e6.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-08-04T22:55:11.15982Z",
+        "created_at": "2025-09-03T17:36:17.894986Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 498612042,
-        "load_duration": 71411834,
+        "total_duration": 363397458,
+        "load_duration": 86692791,
        "prompt_eval_count": 23,
-        "prompt_eval_duration": 102000000,
+        "prompt_eval_duration": 68658541,
        "eval_count": 6,
-        "eval_duration": 323000000,
+        "eval_duration": 207389084,
        "response": "Humans live on Earth.",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/23506e73bb9e.json
+++ b/tests/integration/recordings/responses/23506e73bb9e.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "This is a test file 1"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.055990793,
+              0.076004684,
+              -0.09247725,
+              0.014340361,
+              0.058780864,
+              -0.032434482,
+              0.020954052,
+              0.028818125,
+              -0.06591213,
+              0.013541593,
+              0.12999941,
+              0.004603084,
+              -0.0069239275,
+              -0.055457443,
+              -0.047553156,
+              -0.029139794,
+              -0.12236376,
+              -0.05360872,
+              -0.014706594,
+              0.05984688,
+              0.034442738,
+              0.02076038,
+              -0.048697792,
+              0.0135388365,
+              0.058592733,
+              -0.003076384,
+              -0.031565297,
+              0.082541116,
+              -0.031259205,
+              -0.12057633,
+              0.038319625,
+              0.06574785,
+              0.06415721,
+              0.038382582,
+              0.12570712,
+              0.03108174,
+              0.10821103,
+              -0.0019794356,
+              -0.024704305,
+              0.028765837,
+              0.01268161,
+              -0.039844505,
+              0.043253522,
+              -0.015898596,
+              -0.0135526005,
+              -0.0050831717,
+              -0.007911988,
+              0.039783813,
+              0.0036548872,
+              -0.033632487,
+              -0.058547974,
+              0.0048877494,
+              -0.089586094,
+              -0.010457663,
+              0.059202507,
+              -0.020414542,
+              0.014278556,
+              0.013986488,
+              -0.0046022516,
+              0.0383391,
+              0.0048145773,
+              0.029772853,
+              -0.020863408,
+              0.018640704,
+              0.12422993,
+              -0.023236223,
+              -0.040323637,
+              -0.023598222,
+              -0.007448043,
+              -0.09083128,
+              -0.16859712,
+              0.01012451,
+              -0.035808884,
+              0.010595173,
+              -0.02050494,
+              0.0020821376,
+              -0.10925222,
+              0.00793264,
+              0.048889533,
+              -0.11391199,
+              -0.06072707,
+              -0.13435508,
+              0.0063265716,
+              -0.008838073,
+              -0.03153269,
+              0.099169336,
+              0.055310693,
+              0.0068571265,
+              -0.023463152,
+              -0.0031599961,
+              0.036782328,
+              0.014336826,
+              0.022220163,
+              0.047114056,
+              0.007079763,
+              0.06806425,
+              0.01851431,
+              0.040882625,
+              0.055058856,
+              0.09488346,
+              -0.015833577,
+              -7.924328e-05,
+              0.010821554,
+              0.09177704,
+              -0.07464829,
+              -0.06471165,
+              0.07013805,
+              -0.04499751,
+              0.057702336,
+              -0.0260911,
+              0.006323043,
+              -0.09500501,
+              -0.010549514,
+              -0.07887475,
+              0.039744847,
+              -0.04154404,
+              -0.055268157,
+              0.07540271,
+              -0.04667509,
+              0.036143072,
+              0.080297194,
+              -0.036381353,
+              -0.03477274,
+              0.01701203,
+              -0.047007203,
+              -0.06519774,
+              0.062141683,
+              -4.222482e-33,
+              -0.0017580023,
+              -0.09383388,
+              -0.02982657,
+              0.1257841,
+              0.03802007,
+              -0.03654342,
+              0.0060920226,
+              0.05906885,
+              -0.11074452,
+              0.005664566,
+              -0.0259852,
+              -0.074819505,
+              0.008342821,
+              0.027451068,
+              -0.05248069,
+              0.02401768,
+              -0.004380289,
+              0.039321493,
+              -0.04213744,
+              -0.027290314,
+              0.054677974,
+              0.02707243,
+              -0.03329442,
+              -0.060589895,
+              -0.050737355,
+              0.017969057,
+              -0.0035060972,
+              -0.04666249,
+              0.073946096,
+              0.01333894,
+              -0.0033873583,
+              -0.046544433,
+              -0.060105033,
+              0.03406923,
+              0.001542676,
+              0.039177947,
+              0.03989323,
+              -0.012346489,
+              -0.030511485,
+              -0.0019157606,
+              -0.014608986,
+              -0.012997742,
+              0.019522104,
+              -0.022349002,
+              0.074362256,
+              -0.053366993,
+              -0.023993475,
+              0.029225096,
+              0.027534606,
+              0.015111057,
+              -0.020442221,
+              0.043327376,
+              0.019660354,
+              0.017330697,
+              -0.0035011724,
+              0.019482937,
+              -0.0003428041,
+              0.0004143988,
+              -0.005117252,
+              0.06624799,
+              0.027922852,
+              0.041020587,
+              -0.067166425,
+              0.028737254,
+              -0.03478325,
+              -0.055551115,
+              -0.032713737,
+              -0.08099247,
+              0.09216284,
+              0.06395264,
+              -0.049168136,
+              -0.039908994,
+              0.036915958,
+              -0.001602359,
+              0.00033041168,
+              -0.026015632,
+              -0.005999889,
+              0.05474541,
+              -0.09568287,
+              -0.05186289,
+              -0.048838183,
+              -0.08639551,
+              -0.034023147,
+              -0.033257127,
+              -0.05651867,
+              -0.051131375,
+              0.00809173,
+              -0.08581851,
+              0.06507323,
+              -0.085427366,
+              0.027997404,
+              0.029847065,
+              -0.031673994,
+              -0.08560956,
+              0.1017672,
+              2.1855676e-33,
+              0.01160785,
+              0.077607885,
+              -0.017380483,
+              0.005239329,
+              0.0009684126,
+              0.06543702,
+              0.07256893,
+              -0.044318836,
+              -0.04749324,
+              0.14031002,
+              -0.025741624,
+              0.0057860985,
+              0.040946104,
+              -0.054880083,
+              0.074413285,
+              -0.023610368,
+              0.018364722,
+              -0.060585637,
+              -0.044149306,
+              0.0027854694,
+              -0.04580664,
+              0.1172219,
+              0.10268574,
+              0.07907412,
+              -0.0466143,
+              0.018618405,
+              0.029834948,
+              0.037265483,
+              0.02273822,
+              -0.0026589038,
+              0.041726097,
+              0.06439532,
+              -0.089163445,
+              0.018188318,
+              0.024064727,
+              -0.096389584,
+              0.08642254,
+              -0.05389359,
+              0.01923105,
+              0.045092683,
+              0.045125954,
+              0.09655961,
+              0.014908797,
+              0.059611585,
+              0.03066662,
+              0.05882299,
+              0.111484826,
+              0.016632542,
+              0.011590394,
+              -0.023702666,
+              -0.008617484,
+              -0.055030316,
+              0.047606383,
+              -0.014632687,
+              -0.014156344,
+              0.069926,
+              0.032047603,
+              0.042642817,
+              -0.053942375,
+              0.031047028,
+              0.009216673,
+              0.033024028,
+              -0.019033706,
+              0.005568194,
+              -0.014985451,
+              -0.09193244,
+              -0.03210824,
+              0.015367608,
+              0.029150328,
+              0.01250386,
+              -0.004827391,
+              0.023345906,
+              -0.028271332,
+              -0.08454125,
+              0.051068563,
+              -0.0133641455,
+              -0.029022738,
+              -0.02258452,
+              0.010884119,
+              -0.009810021,
+              0.049751773,
+              -0.0032637494,
+              -0.038813565,
+              0.027924104,
+              0.017925078,
+              0.005337612,
+              0.058691237,
+              0.09577674,
+              -0.014308608,
+              0.006972794,
+              -0.02733344,
+              0.06912433,
+              0.05727631,
+              0.03206042,
+              0.0042422824,
+              -1.6766318e-08,
+              -0.036354303,
+              -0.09146416,
+              -0.026319364,
+              -0.007941995,
+              -0.024127059,
+              0.09896698,
+              -0.04723083,
+              -0.03767135,
+              -0.029419973,
+              -0.022513283,
+              0.04125822,
+              -0.0011487947,
+              -0.05570366,
+              0.020679709,
+              -0.038118906,
+              -0.0524994,
+              -0.02624128,
+              -0.05336954,
+              -0.040593866,
+              -0.0073642326,
+              -0.0014442836,
+              0.02714257,
+              0.027141048,
+              0.00932513,
+              -0.00026505854,
+              0.038233075,
+              0.037096914,
+              0.08405413,
+              -0.06340637,
+              -0.014856458,
+              0.05038612,
+              0.06703033,
+              0.027668556,
+              -0.04360097,
+              -0.012041474,
+              0.08500689,
+              0.111594744,
+              0.1046117,
+              0.019726463,
+              -0.0003025109,
+              -0.04110389,
+              0.009575226,
+              -0.05285304,
+              -0.0026365265,
+              -0.031144748,
+              -0.08860188,
+              -0.06762232,
+              -0.07451522,
+              -0.053012833,
+              -0.09560941,
+              -0.05273455,
+              0.013032144,
+              0.0029190276,
+              0.041905046,
+              -0.04522114,
+              0.016730292,
+              0.017214278,
+              0.021578068,
+              -0.03718778,
+              0.02353425,
+              0.052041385,
+              0.06444499,
+              0.02387539,
+              -0.025236009
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/2afe3b38ca01.json
+++ b/tests/integration/recordings/responses/2afe3b38ca01.json
@ -22,7 +22,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:01.887809Z",
+          "created_at": "2025-09-03T17:37:50.436472Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -40,7 +40,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:01.942369Z",
+          "created_at": "2025-09-03T17:37:50.478138Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -58,7 +58,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:01.99605Z",
+          "created_at": "2025-09-03T17:37:50.519952Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -76,7 +76,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.049974Z",
+          "created_at": "2025-09-03T17:37:50.561433Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -94,7 +94,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.102027Z",
+          "created_at": "2025-09-03T17:37:50.603624Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -112,7 +112,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.158416Z",
+          "created_at": "2025-09-03T17:37:50.645851Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -130,7 +130,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.211753Z",
+          "created_at": "2025-09-03T17:37:50.688403Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -148,7 +148,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.265564Z",
+          "created_at": "2025-09-03T17:37:50.72991Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -166,7 +166,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.31618Z",
+          "created_at": "2025-09-03T17:37:50.771635Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -184,7 +184,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.370325Z",
+          "created_at": "2025-09-03T17:37:50.813711Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -202,7 +202,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.424667Z",
+          "created_at": "2025-09-03T17:37:50.856201Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -220,7 +220,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.47913Z",
+          "created_at": "2025-09-03T17:37:50.899048Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -238,15 +238,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:02.536984Z",
+          "created_at": "2025-09-03T17:37:50.94069Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 1042724125,
-          "load_duration": 86161375,
+          "total_duration": 688370708,
+          "load_duration": 107469833,
          "prompt_eval_count": 399,
-          "prompt_eval_duration": 305000000,
+          "prompt_eval_duration": 74988334,
          "eval_count": 13,
-          "eval_duration": 650000000,
+          "eval_duration": 505216458,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/2d187a11704c.json
+++ b/tests/integration/recordings/responses/2d187a11704c.json
@ -22,7 +22,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:11.938867Z",
+          "created_at": "2025-09-03T17:37:56.566151Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -40,7 +40,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:11.991247Z",
+          "created_at": "2025-09-03T17:37:56.609308Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -58,7 +58,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.043953Z",
+          "created_at": "2025-09-03T17:37:56.651314Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -76,7 +76,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.096001Z",
+          "created_at": "2025-09-03T17:37:56.693185Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -94,7 +94,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.150454Z",
+          "created_at": "2025-09-03T17:37:56.734643Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -112,7 +112,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.201249Z",
+          "created_at": "2025-09-03T17:37:56.776343Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -130,7 +130,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.252534Z",
+          "created_at": "2025-09-03T17:37:56.81705Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -148,7 +148,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.30063Z",
+          "created_at": "2025-09-03T17:37:56.857959Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -166,7 +166,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.351034Z",
+          "created_at": "2025-09-03T17:37:56.899424Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -184,7 +184,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.405032Z",
+          "created_at": "2025-09-03T17:37:56.939218Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -202,7 +202,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.462645Z",
+          "created_at": "2025-09-03T17:37:56.980065Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -220,7 +220,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.520337Z",
+          "created_at": "2025-09-03T17:37:57.02214Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -238,7 +238,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.575809Z",
+          "created_at": "2025-09-03T17:37:57.0628Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -256,7 +256,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.633724Z",
+          "created_at": "2025-09-03T17:37:57.106061Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -274,7 +274,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.683133Z",
+          "created_at": "2025-09-03T17:37:57.1492Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -292,7 +292,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.734309Z",
+          "created_at": "2025-09-03T17:37:57.190075Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -310,7 +310,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.785917Z",
+          "created_at": "2025-09-03T17:37:57.23178Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -328,7 +328,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.835705Z",
+          "created_at": "2025-09-03T17:37:57.272738Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -346,7 +346,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.886509Z",
+          "created_at": "2025-09-03T17:37:57.313855Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -364,7 +364,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.937134Z",
+          "created_at": "2025-09-03T17:37:57.354964Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -382,7 +382,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:12.988532Z",
+          "created_at": "2025-09-03T17:37:57.395971Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -400,7 +400,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.041798Z",
+          "created_at": "2025-09-03T17:37:57.438471Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -418,7 +418,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.095443Z",
+          "created_at": "2025-09-03T17:37:57.479796Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -436,7 +436,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.151402Z",
+          "created_at": "2025-09-03T17:37:57.520641Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -454,7 +454,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.203462Z",
+          "created_at": "2025-09-03T17:37:57.561511Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -472,7 +472,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.254567Z",
+          "created_at": "2025-09-03T17:37:57.602875Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -490,7 +490,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.305865Z",
+          "created_at": "2025-09-03T17:37:57.643406Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -508,7 +508,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.357658Z",
+          "created_at": "2025-09-03T17:37:57.684279Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -526,7 +526,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.407773Z",
+          "created_at": "2025-09-03T17:37:57.725699Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -544,7 +544,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.458919Z",
+          "created_at": "2025-09-03T17:37:57.766658Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -562,7 +562,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.510456Z",
+          "created_at": "2025-09-03T17:37:57.80738Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -580,7 +580,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.565948Z",
+          "created_at": "2025-09-03T17:37:57.848466Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -598,7 +598,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.619155Z",
+          "created_at": "2025-09-03T17:37:57.889056Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -616,7 +616,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.672754Z",
+          "created_at": "2025-09-03T17:37:57.931554Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -634,7 +634,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.729473Z",
+          "created_at": "2025-09-03T17:37:57.974754Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -652,7 +652,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.788666Z",
+          "created_at": "2025-09-03T17:37:58.016978Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -670,7 +670,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.850575Z",
+          "created_at": "2025-09-03T17:37:58.057942Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -688,7 +688,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.904807Z",
+          "created_at": "2025-09-03T17:37:58.099015Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -706,7 +706,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:13.958524Z",
+          "created_at": "2025-09-03T17:37:58.140531Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -724,7 +724,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.011742Z",
+          "created_at": "2025-09-03T17:37:58.181382Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -742,7 +742,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.064933Z",
+          "created_at": "2025-09-03T17:37:58.223318Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -760,7 +760,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.116454Z",
+          "created_at": "2025-09-03T17:37:58.26358Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -778,7 +778,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.172682Z",
+          "created_at": "2025-09-03T17:37:58.305496Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -796,7 +796,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.227654Z",
+          "created_at": "2025-09-03T17:37:58.347254Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -814,7 +814,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.282068Z",
+          "created_at": "2025-09-03T17:37:58.390044Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -832,7 +832,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.334565Z",
+          "created_at": "2025-09-03T17:37:58.430867Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -850,7 +850,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.383532Z",
+          "created_at": "2025-09-03T17:37:58.471376Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -868,7 +868,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.432138Z",
+          "created_at": "2025-09-03T17:37:58.51208Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -886,7 +886,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.480995Z",
+          "created_at": "2025-09-03T17:37:58.553226Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -904,7 +904,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.531968Z",
+          "created_at": "2025-09-03T17:37:58.594787Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -922,7 +922,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.584044Z",
+          "created_at": "2025-09-03T17:37:58.63466Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -940,7 +940,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.635691Z",
+          "created_at": "2025-09-03T17:37:58.674628Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -958,7 +958,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.68837Z",
+          "created_at": "2025-09-03T17:37:58.714616Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -976,7 +976,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.73985Z",
+          "created_at": "2025-09-03T17:37:58.754906Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -994,7 +994,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.792412Z",
+          "created_at": "2025-09-03T17:37:58.795048Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1012,7 +1012,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.845872Z",
+          "created_at": "2025-09-03T17:37:58.835297Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1030,7 +1030,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.900102Z",
+          "created_at": "2025-09-03T17:37:58.875738Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1048,7 +1048,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:14.954589Z",
+          "created_at": "2025-09-03T17:37:58.91604Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1066,7 +1066,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.006629Z",
+          "created_at": "2025-09-03T17:37:58.956596Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1084,7 +1084,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.058561Z",
+          "created_at": "2025-09-03T17:37:58.996664Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1102,7 +1102,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.111954Z",
+          "created_at": "2025-09-03T17:37:59.037796Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1120,7 +1120,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.169173Z",
+          "created_at": "2025-09-03T17:37:59.078586Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1138,7 +1138,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.222569Z",
+          "created_at": "2025-09-03T17:37:59.119448Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1156,7 +1156,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.275795Z",
+          "created_at": "2025-09-03T17:37:59.160318Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1174,7 +1174,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.3327Z",
+          "created_at": "2025-09-03T17:37:59.201852Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1192,7 +1192,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.389931Z",
+          "created_at": "2025-09-03T17:37:59.243763Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1210,7 +1210,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.442349Z",
+          "created_at": "2025-09-03T17:37:59.284948Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1228,7 +1228,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.494175Z",
+          "created_at": "2025-09-03T17:37:59.325598Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1246,7 +1246,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.545764Z",
+          "created_at": "2025-09-03T17:37:59.366289Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1264,7 +1264,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.599099Z",
+          "created_at": "2025-09-03T17:37:59.406764Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1282,7 +1282,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.649852Z",
+          "created_at": "2025-09-03T17:37:59.447922Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1300,7 +1300,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.698222Z",
+          "created_at": "2025-09-03T17:37:59.488486Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1318,7 +1318,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.747168Z",
+          "created_at": "2025-09-03T17:37:59.529Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1336,7 +1336,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.797196Z",
+          "created_at": "2025-09-03T17:37:59.569417Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1354,7 +1354,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.845587Z",
+          "created_at": "2025-09-03T17:37:59.610542Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1372,7 +1372,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.897171Z",
+          "created_at": "2025-09-03T17:37:59.651411Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1390,7 +1390,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.944524Z",
+          "created_at": "2025-09-03T17:37:59.69241Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1408,7 +1408,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:15.994467Z",
+          "created_at": "2025-09-03T17:37:59.732339Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1426,7 +1426,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.045224Z",
+          "created_at": "2025-09-03T17:37:59.772462Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1444,7 +1444,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.093853Z",
+          "created_at": "2025-09-03T17:37:59.812507Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1462,7 +1462,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.144847Z",
+          "created_at": "2025-09-03T17:37:59.852762Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1480,7 +1480,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.197888Z",
+          "created_at": "2025-09-03T17:37:59.892984Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1498,7 +1498,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.250854Z",
+          "created_at": "2025-09-03T17:37:59.933555Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1516,7 +1516,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.301995Z",
+          "created_at": "2025-09-03T17:37:59.973778Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1534,7 +1534,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.352508Z",
+          "created_at": "2025-09-03T17:38:00.014923Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1552,7 +1552,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.40259Z",
+          "created_at": "2025-09-03T17:38:00.057464Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1570,7 +1570,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.453514Z",
+          "created_at": "2025-09-03T17:38:00.09902Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1588,7 +1588,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.50378Z",
+          "created_at": "2025-09-03T17:38:00.140492Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1606,7 +1606,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.554395Z",
+          "created_at": "2025-09-03T17:38:00.180239Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1624,7 +1624,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.605795Z",
+          "created_at": "2025-09-03T17:38:00.220364Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1642,7 +1642,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.656313Z",
+          "created_at": "2025-09-03T17:38:00.26097Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1660,7 +1660,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.706438Z",
+          "created_at": "2025-09-03T17:38:00.301228Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1678,7 +1678,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.756444Z",
+          "created_at": "2025-09-03T17:38:00.341631Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1696,7 +1696,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.807687Z",
+          "created_at": "2025-09-03T17:38:00.383006Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1714,7 +1714,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.85835Z",
+          "created_at": "2025-09-03T17:38:00.423509Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1732,7 +1732,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.909311Z",
+          "created_at": "2025-09-03T17:38:00.464702Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1750,7 +1750,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:16.959327Z",
+          "created_at": "2025-09-03T17:38:00.505914Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1768,7 +1768,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:17.010211Z",
+          "created_at": "2025-09-03T17:38:00.546505Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1786,7 +1786,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:17.061365Z",
+          "created_at": "2025-09-03T17:38:00.587839Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -1804,15 +1804,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:17.111956Z",
+          "created_at": "2025-09-03T17:38:00.629018Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 5499672375,
-          "load_duration": 58161750,
+          "total_duration": 4303339291,
+          "load_duration": 156231250,
          "prompt_eval_count": 36,
-          "prompt_eval_duration": 266000000,
+          "prompt_eval_duration": 81909875,
          "eval_count": 100,
-          "eval_duration": 5174000000,
+          "eval_duration": 4064559292,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/325a72db5755.json
+++ b/tests/integration/recordings/responses/325a72db5755.json
--- a/tests/integration/recordings/responses/382c2f22274c.json
+++ b/tests/integration/recordings/responses/382c2f22274c.json
@ -1,7 +1,7 @@
 {
  "request": {
    "method": "POST",
-    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
@ -22,14 +22,14 @@
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
-        "id": "chatcmpl-339",
+        "id": "chatcmpl-442",
        "choices": [
          {
            "finish_reason": "length",
            "index": 0,
            "logprobs": null,
            "message": {
-              "content": "I can guide you through the process, but please note that this is not an official OpenAI API call. OpenAI's API terms and conditions prohibit using their models for malicious purposes.\n\nTo test a model like \"text-temperature\" with a temperature of 0 (i.e., no noise or randomness), we'll need to use a third-party library that connects to the OpenAI API. One such library is `transformers`.\n\nFirst, you need to install the `transformers` and `",
+              "content": "I can guide you on how to use the `test-temperature` parameter with OpenAI's API, but please note that using a temperature of 0 may not produce meaningful results. Temperature is a hyperparameter that controls the level of randomness in the model's output.\n\nOpenAI's API uses a variant of the GPT-3 model, which is trained on a large corpus of text data. The `test-temperature` parameter allows you to adjust the level of randomness in the model's output",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
@ -39,7 +39,7 @@
            }
          }
        ],
-        "created": 1754510065,
+        "created": 1756921254,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
--- a/tests/integration/recordings/responses/3c0bf9ba81b2.json
+++ b/tests/integration/recordings/responses/3c0bf9ba81b2.json
@ -20,14 +20,14 @@
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
-        "id": "chatcmpl-651",
+        "id": "chatcmpl-334",
        "choices": [
          {
            "finish_reason": "length",
            "index": 0,
            "logprobs": null,
            "message": {
-              "content": "I'm ready to help",
+              "content": "It looks like we've",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
@ -37,7 +37,7 @@
            }
          }
        ],
-        "created": 1755294941,
+        "created": 1756921086,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
--- a/tests/integration/recordings/responses/3c3f13cb7794.json
+++ b/tests/integration/recordings/responses/3c3f13cb7794.json
@ -21,7 +21,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.338232Z",
+          "created_at": "2025-09-03T17:36:18.136699Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -39,7 +39,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.39419Z",
+          "created_at": "2025-09-03T17:36:18.177622Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -57,7 +57,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.445346Z",
+          "created_at": "2025-09-03T17:36:18.218104Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -75,7 +75,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.496701Z",
+          "created_at": "2025-09-03T17:36:18.258837Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -93,7 +93,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.546804Z",
+          "created_at": "2025-09-03T17:36:18.299715Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -111,7 +111,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.601009Z",
+          "created_at": "2025-09-03T17:36:18.341602Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -129,7 +129,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.652788Z",
+          "created_at": "2025-09-03T17:36:18.385504Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -147,7 +147,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.703325Z",
+          "created_at": "2025-09-03T17:36:18.429427Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -165,7 +165,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.754033Z",
+          "created_at": "2025-09-03T17:36:18.473547Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -183,7 +183,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.804654Z",
+          "created_at": "2025-09-03T17:36:18.516327Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -201,15 +201,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:11.854841Z",
+          "created_at": "2025-09-03T17:36:18.559332Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 652371000,
-          "load_duration": 42086042,
+          "total_duration": 628034000,
+          "load_duration": 116384417,
          "prompt_eval_count": 26,
-          "prompt_eval_duration": 78000000,
+          "prompt_eval_duration": 87798792,
          "eval_count": 11,
-          "eval_duration": 531000000,
+          "eval_duration": 423189583,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/3ca695048bee.json
+++ b/tests/integration/recordings/responses/3ca695048bee.json
@ -1,7 +1,7 @@
 {
  "request": {
    "method": "POST",
-    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
@ -39,7 +39,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-490",
+          "id": "chatcmpl-3",
          "choices": [
            {
              "delta": {
@ -50,7 +50,7 @@
                "tool_calls": [
                  {
                    "index": 0,
-                    "id": "call_rolv1ozt",
+                    "id": "call_3kigugt3",
                    "function": {
                      "arguments": "{\"city\":\"Tokyo\"}",
                      "name": "get_weather"
@ -64,7 +64,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081852,
+          "created": 1756921361,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -75,7 +75,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-490",
+          "id": "chatcmpl-3",
          "choices": [
            {
              "delta": {
@ -85,12 +85,12 @@
                "role": "assistant",
                "tool_calls": null
              },
-              "finish_reason": "stop",
+              "finish_reason": "tool_calls",
              "index": 0,
              "logprobs": null
            }
          ],
-          "created": 1754081852,
+          "created": 1756921361,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
--- a/tests/integration/recordings/responses/3dff18060ebc.json
+++ b/tests/integration/recordings/responses/3dff18060ebc.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "The secret string is foobazbar."
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.060630284,
+              0.06372823,
+              -0.059383437,
+              -0.010313639,
+              -0.11985778,
+              0.033409074,
+              0.056847293,
+              -0.0064553,
+              0.029896382,
+              -0.05037607,
+              0.015193001,
+              -0.0634204,
+              0.015119892,
+              -0.08354324,
+              0.0092577925,
+              0.044272587,
+              -0.024397198,
+              -0.05100177,
+              -0.028086444,
+              -0.07390362,
+              0.07088186,
+              0.08101153,
+              0.006050408,
+              -0.043090094,
+              0.010714593,
+              -0.01581376,
+              0.0351736,
+              0.06538307,
+              0.03639655,
+              -0.05625738,
+              0.073681176,
+              0.04730274,
+              0.067169026,
+              -0.01207242,
+              -0.018193275,
+              0.0042488067,
+              0.029168725,
+              0.0067459582,
+              0.037927665,
+              0.0024767139,
+              0.014044963,
+              0.022671249,
+              -0.090508185,
+              0.041952047,
+              -0.07933115,
+              0.031992197,
+              -0.038355146,
+              0.037013844,
+              -0.0036946274,
+              -0.016986867,
+              0.03696087,
+              -0.07697335,
+              -0.020080294,
+              0.07733012,
+              0.04521822,
+              -0.007816803,
+              -0.0058926586,
+              0.009962128,
+              0.033492323,
+              0.09000152,
+              0.016161384,
+              0.036999356,
+              -0.039193578,
+              -0.010969346,
+              0.023929566,
+              -0.03698458,
+              -0.008227196,
+              0.018780757,
+              -0.0006967325,
+              -0.062018193,
+              -0.030388007,
+              -0.037649162,
+              -0.04654288,
+              0.038450293,
+              -0.010377299,
+              -0.032971557,
+              0.013547814,
+              -0.059036925,
+              0.0630603,
+              0.0159564,
+              -0.04845087,
+              -0.069917254,
+              -0.022502322,
+              0.04408022,
+              0.03618941,
+              0.060470726,
+              -0.04313285,
+              0.028797466,
+              0.0062393937,
+              0.01027349,
+              -0.078714885,
+              -0.091531575,
+              0.04391341,
+              0.013202597,
+              -0.0037814155,
+              0.0102497,
+              0.020225797,
+              0.05634384,
+              -0.09700619,
+              0.06577961,
+              0.047118917,
+              0.01876648,
+              0.12445029,
+              -0.06447121,
+              -0.012632697,
+              0.016056264,
+              0.08604982,
+              0.024878234,
+              0.10627678,
+              -0.043176394,
+              -0.046339765,
+              -0.03149599,
+              -0.001784808,
+              -0.023469802,
+              -0.05079461,
+              0.0046657966,
+              0.043237828,
+              0.057146583,
+              -0.065833576,
+              0.032975562,
+              -0.028763266,
+              0.037831448,
+              0.00017829033,
+              0.043322463,
+              -0.13265091,
+              0.0263673,
+              -0.04247752,
+              -3.3340873e-33,
+              -0.0022191573,
+              0.050657377,
+              0.028066125,
+              -0.033898965,
+              -0.0045730886,
+              -0.034653578,
+              -0.08628417,
+              0.043108672,
+              0.01022734,
+              0.044009056,
+              -0.03020062,
+              -0.0936044,
+              -0.06522928,
+              -0.059762992,
+              0.037560984,
+              -0.025942331,
+              -0.06655938,
+              0.0043691625,
+              0.018846871,
+              -0.035582166,
+              0.02240012,
+              0.08943218,
+              0.033568345,
+              -0.11379316,
+              0.03822112,
+              -0.044403847,
+              0.10261262,
+              -0.07330182,
+              0.089390896,
+              0.056668896,
+              -0.009407597,
+              -0.0646505,
+              0.016652016,
+              0.007326742,
+              0.005187682,
+              0.0051324354,
+              -0.013595071,
+              -0.04918112,
+              -0.06672084,
+              0.010838405,
+              0.04638185,
+              -0.11490209,
+              -0.055054087,
+              0.040443793,
+              -0.032746885,
+              0.03498173,
+              -0.023567867,
+              -0.012213799,
+              0.048050664,
+              0.01159698,
+              0.007860181,
+              0.03801084,
+              -0.027765153,
+              0.003296162,
+              -0.0033349432,
+              0.006083357,
+              0.03200884,
+              0.048306234,
+              0.013800832,
+              0.036165927,
+              -0.022672432,
+              0.09197581,
+              0.029846204,
+              0.08112345,
+              -0.08677228,
+              -0.028041098,
+              0.0556574,
+              -0.030357547,
+              -0.016538681,
+              0.031826265,
+              -0.07586954,
+              -0.009915978,
+              0.028101236,
+              0.002207158,
+              -0.10496646,
+              -0.023673821,
+              -0.024204832,
+              -0.0003132271,
+              0.0016462951,
+              -0.037603874,
+              0.025533162,
+              -0.05221861,
+              0.021656586,
+              0.099111386,
+              -0.06896361,
+              -0.018568028,
+              0.07245527,
+              -0.10582686,
+              -0.08505038,
+              -0.029969748,
+              -0.015717981,
+              -0.056855034,
+              -0.02698479,
+              -0.06410572,
+              0.0057078917,
+              1.2902391e-33,
+              0.05490771,
+              -0.036417797,
+              -0.0023541928,
+              -0.03591478,
+              0.106852315,
+              -0.04931468,
+              0.037884213,
+              0.050633065,
+              -0.083874516,
+              -0.018756155,
+              0.0036251817,
+              0.028974183,
+              -0.0027879397,
+              -0.036439158,
+              0.11148004,
+              0.051007163,
+              0.040258586,
+              0.09245398,
+              -0.01367112,
+              -0.070999645,
+              -0.043213032,
+              -0.060117763,
+              -0.03019449,
+              0.009107182,
+              -0.044254936,
+              0.04843456,
+              0.117205575,
+              -0.009833911,
+              0.0023962231,
+              0.09339494,
+              -0.059902366,
+              0.0101377955,
+              -0.03777244,
+              -0.04344207,
+              -0.14677393,
+              -0.022666233,
+              -0.008934328,
+              -0.02157697,
+              -0.021902358,
+              -0.06611372,
+              0.016243221,
+              0.062620856,
+              0.01056146,
+              0.04721975,
+              -0.087221384,
+              0.009420561,
+              -0.017691165,
+              -0.03847053,
+              0.010398396,
+              0.022942957,
+              0.099518456,
+              -0.021421565,
+              0.0016765085,
+              -0.039359514,
+              0.01641369,
+              0.039669517,
+              -0.119695365,
+              0.009885617,
+              0.003855461,
+              0.018273395,
+              -0.0454586,
+              0.0020496584,
+              0.024263415,
+              0.016978405,
+              0.06884217,
+              -0.027432522,
+              -0.01813802,
+              0.053840507,
+              -0.028815664,
+              -0.045221787,
+              0.11472852,
+              0.019796453,
+              -0.05785514,
+              0.016556906,
+              -0.07362942,
+              0.04025756,
+              -0.01510899,
+              0.0067040483,
+              -0.049666926,
+              0.045941774,
+              0.077951804,
+              -0.042951427,
+              0.021852365,
+              0.063826546,
+              0.08110754,
+              -0.070652775,
+              -0.03245094,
+              0.09259784,
+              -0.020451743,
+              0.0701599,
+              -0.020740295,
+              0.09339449,
+              -0.051164806,
+              0.039440546,
+              0.02560772,
+              -1.6767814e-08,
+              0.001529873,
+              0.0080792755,
+              -0.017666567,
+              -0.034070052,
+              0.06805411,
+              0.07387949,
+              -0.07592055,
+              -0.11369049,
+              -0.022008128,
+              0.009088418,
+              0.03108134,
+              -0.0056734695,
+              -0.0462051,
+              0.0037219985,
+              0.013269294,
+              -0.03213892,
+              -0.05557376,
+              -0.010602884,
+              0.006751397,
+              -0.025462827,
+              -0.0836812,
+              0.08886153,
+              0.005159859,
+              -0.051621262,
+              -0.051873572,
+              0.039706588,
+              -0.042155124,
+              0.057125967,
+              0.088910565,
+              0.049736783,
+              0.04144574,
+              0.094677895,
+              -0.037107926,
+              -0.06845684,
+              -0.061673928,
+              0.09891817,
+              -0.05952751,
+              -0.0331722,
+              -0.026014913,
+              0.077612035,
+              0.056150436,
+              0.010709955,
+              0.018974187,
+              0.056079865,
+              -0.041700333,
+              -0.02731697,
+              0.10184176,
+              -0.036189064,
+              -0.029914921,
+              -0.043333948,
+              0.043660097,
+              0.018800316,
+              -0.0042763646,
+              0.055898346,
+              -0.0034344571,
+              0.060258396,
+              -0.1337251,
+              0.008184424,
+              -0.031549457,
+              0.022398692,
+              0.037932154,
+              0.024529235,
+              0.068037644,
+              0.07021777
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 9,
+          "total_tokens": 9
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/417020320684.json
+++ b/tests/integration/recordings/responses/417020320684.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "Python programming language"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.063880146,
+              0.013411989,
+              -0.054502595,
+              0.01193493,
+              -0.074262686,
+              -0.13344447,
+              0.04294062,
+              0.045387108,
+              -0.06949706,
+              -0.035939943,
+              0.01200873,
+              0.0068830596,
+              0.08886977,
+              0.0026030506,
+              0.032482542,
+              -0.007821568,
+              -0.05044649,
+              0.006662123,
+              0.027794942,
+              -0.12791364,
+              0.00062353734,
+              0.045270294,
+              -0.03605076,
+              0.044243146,
+              0.0129354475,
+              -0.0092799105,
+              0.011904844,
+              0.026060482,
+              0.020055141,
+              -0.03368774,
+              -0.028043076,
+              0.087557025,
+              0.059002083,
+              0.053893365,
+              0.02027196,
+              0.06840361,
+              -0.03180594,
+              -0.087597735,
+              -0.11277839,
+              0.022651086,
+              -0.09037903,
+              -0.0033202847,
+              -0.040132593,
+              -0.034084503,
+              -0.032953303,
+              0.02925268,
+              -0.03903928,
+              0.04551951,
+              -0.0331016,
+              -0.006518362,
+              -0.09629851,
+              -0.011739161,
+              -0.052575007,
+              -0.064773224,
+              0.031043475,
+              -0.012586444,
+              0.09737276,
+              0.005224713,
+              -0.035071153,
+              -0.1404299,
+              -0.06678175,
+              0.03654573,
+              -0.039277818,
+              0.07014256,
+              -0.0010227569,
+              -0.026846789,
+              -0.0175696,
+              0.03044068,
+              0.06403526,
+              -0.031643596,
+              -0.14598879,
+              -0.045400888,
+              -0.018469285,
+              0.06689445,
+              0.030553635,
+              -0.12255281,
+              0.061046645,
+              -0.05678168,
+              -0.005118667,
+              -0.0087622,
+              0.006514719,
+              -0.016424034,
+              -0.033650044,
+              0.08491301,
+              -0.00029260007,
+              -0.07339515,
+              0.038627055,
+              0.15695965,
+              0.010035773,
+              0.025318887,
+              -0.0021428047,
+              -0.04613549,
+              0.06244243,
+              -0.019905778,
+              -0.05471386,
+              0.09796629,
+              0.0384793,
+              -0.072424814,
+              -0.038704097,
+              0.07158691,
+              0.007360897,
+              -0.05120446,
+              0.0313513,
+              -0.032230332,
+              0.039326303,
+              -0.009643992,
+              0.069905065,
+              -0.052026685,
+              0.049440835,
+              -0.04272916,
+              -0.0037707465,
+              -0.04155246,
+              -0.0561972,
+              -0.03340213,
+              0.05105359,
+              0.038616214,
+              -0.0029470131,
+              0.08188407,
+              -0.0035886324,
+              0.04530431,
+              0.0068888925,
+              0.016499842,
+              0.016347302,
+              0.007283021,
+              -0.021663606,
+              -0.0046215886,
+              -0.007931065,
+              -4.1536508e-33,
+              -0.045777988,
+              -0.050903402,
+              -0.038634304,
+              0.0100991195,
+              0.070007294,
+              -0.025182785,
+              0.1050647,
+              -0.0049731904,
+              -0.064141616,
+              -0.047639705,
+              0.012718577,
+              0.05198462,
+              -0.016051587,
+              0.08170543,
+              0.024008816,
+              -0.020879291,
+              0.045706064,
+              0.091577366,
+              0.02512945,
+              0.019055998,
+              0.048144504,
+              0.097951256,
+              0.034154113,
+              0.03543114,
+              0.011410896,
+              -0.043446988,
+              -0.0041784984,
+              -0.05564714,
+              0.01147717,
+              0.0071039577,
+              -0.06426582,
+              -0.020623188,
+              -0.0045247558,
+              -0.012943628,
+              0.02658834,
+              -0.012385487,
+              0.008399212,
+              -0.06824828,
+              0.04683057,
+              -0.04165085,
+              -0.025662417,
+              -0.0038799767,
+              0.05007075,
+              -0.008117481,
+              -0.023308154,
+              0.023914568,
+              0.0015741173,
+              0.046142872,
+              -0.06898886,
+              0.041611847,
+              0.0045286645,
+              -0.047628563,
+              0.054236773,
+              0.06972688,
+              -0.016889753,
+              0.04806098,
+              0.012714234,
+              0.0022186628,
+              -0.006355918,
+              -0.031550523,
+              0.023726372,
+              0.06859327,
+              0.077228814,
+              -0.01227583,
+              0.03901903,
+              0.034360897,
+              0.03032876,
+              0.058690928,
+              0.08030179,
+              0.06976231,
+              -0.09047136,
+              0.02376998,
+              -0.008751518,
+              0.038334776,
+              -0.02751323,
+              0.023137644,
+              0.027101006,
+              -0.08135271,
+              -0.010334998,
+              0.04730408,
+              -0.02033998,
+              -0.026008504,
+              -0.017415512,
+              -0.0035714875,
+              -0.018727385,
+              -0.037389226,
+              0.041064497,
+              0.05317889,
+              -0.0055602547,
+              -0.058561854,
+              -0.072036326,
+              -0.075019896,
+              0.04825644,
+              0.011348427,
+              -0.02259257,
+              1.3515749e-33,
+              0.006240622,
+              0.031606406,
+              -0.036119435,
+              -0.0016494404,
+              -0.08255665,
+              -0.06069396,
+              0.059934463,
+              0.014492232,
+              0.059514895,
+              0.027053975,
+              -0.011601325,
+              -0.057609312,
+              0.10365583,
+              -0.002784741,
+              0.07693759,
+              0.019432511,
+              -0.052210074,
+              0.015158053,
+              -0.0012768542,
+              0.027789148,
+              -0.115292676,
+              0.047323048,
+              -0.07599195,
+              -0.074344486,
+              -0.029194841,
+              -0.020079462,
+              -0.034749795,
+              -0.05769437,
+              -0.0301632,
+              0.04749987,
+              0.012206333,
+              0.011497502,
+              -0.051970575,
+              0.05972769,
+              0.03281016,
+              0.0013676677,
+              0.057720944,
+              -0.041179247,
+              -0.02150875,
+              -0.0067487382,
+              0.1419711,
+              0.05795878,
+              0.010094941,
+              0.09603845,
+              0.014521089,
+              0.02133803,
+              -0.07551916,
+              0.07887724,
+              -0.04273237,
+              -0.06601746,
+              -0.038729392,
+              -0.008161129,
+              0.015012324,
+              -0.049418066,
+              -0.037083283,
+              -0.02378242,
+              0.03743137,
+              0.008194503,
+              -0.086978436,
+              -0.05960285,
+              -0.07732487,
+              -0.056507926,
+              0.029065313,
+              0.0073954053,
+              -0.077878684,
+              0.0026059505,
+              -0.10405392,
+              -0.04738624,
+              -0.015872862,
+              -0.11591199,
+              0.09724705,
+              0.0049243565,
+              -0.010273523,
+              0.0066429917,
+              -0.060295314,
+              0.02550513,
+              -0.052950058,
+              -0.0038489713,
+              -0.050250847,
+              0.07679287,
+              0.046089787,
+              0.007386997,
+              0.0046740095,
+              0.07385862,
+              -0.07792065,
+              0.0013675193,
+              0.013730894,
+              0.05658653,
+              0.021934126,
+              0.007195913,
+              0.0076705213,
+              0.10221154,
+              0.060060997,
+              0.036779005,
+              -0.037765697,
+              -1.187368e-08,
+              -0.00885571,
+              0.01760442,
+              0.062224448,
+              0.032051455,
+              -0.011581793,
+              0.051908698,
+              -0.011685676,
+              -0.06391574,
+              -0.029866237,
+              0.03258576,
+              0.0055078953,
+              -0.012040446,
+              -0.054406017,
+              -0.056690563,
+              -0.030638037,
+              0.14276367,
+              0.028526368,
+              -0.028743364,
+              0.019917691,
+              0.025652615,
+              0.073813364,
+              -0.0066998666,
+              0.0061508445,
+              0.09610696,
+              -0.08799916,
+              -0.0089272335,
+              0.03823298,
+              0.04832936,
+              0.018829934,
+              -0.10534708,
+              0.048226915,
+              -0.02225069,
+              0.020491786,
+              0.014641141,
+              0.030794447,
+              -0.029119467,
+              0.008283775,
+              -0.04506887,
+              0.0025344177,
+              0.021756247,
+              -0.008108281,
+              0.00904927,
+              -0.013340866,
+              -0.014037631,
+              0.06845187,
+              0.045173325,
+              -0.034587316,
+              -0.07275669,
+              -0.004159724,
+              -0.058231864,
+              -0.033032075,
+              0.0040235794,
+              -0.019985583,
+              -0.020122562,
+              0.055365406,
+              0.10250875,
+              -0.10799118,
+              -0.013780294,
+              -0.009652406,
+              0.015592658,
+              -0.031221472,
+              0.1329332,
+              0.15243866,
+              -0.022426173
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 3,
+          "total_tokens": 3
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/4420515208a8.json
+++ b/tests/integration/recordings/responses/4420515208a8.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "What is the secret string?"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.07473014,
+              0.08137506,
+              -0.06463602,
+              0.011821943,
+              -0.07454815,
+              0.021821007,
+              0.077573344,
+              0.012804661,
+              0.05853777,
+              -0.014141324,
+              0.053993534,
+              -0.026554074,
+              -0.018055506,
+              -0.060447972,
+              -0.019253474,
+              -0.006501444,
+              -0.047272332,
+              -0.048944764,
+              -0.090516366,
+              -0.06656194,
+              0.09287066,
+              0.02129739,
+              -0.013401809,
+              -0.006629013,
+              0.0079892,
+              0.016818035,
+              0.03971694,
+              0.021875564,
+              0.014873574,
+              -0.039426163,
+              0.025255844,
+              -0.036836684,
+              0.016627828,
+              0.008789532,
+              -0.053503897,
+              0.03616121,
+              -0.034633957,
+              -0.009877797,
+              0.064843215,
+              -0.01517806,
+              0.020897496,
+              -0.07135096,
+              -0.008519908,
+              0.05118655,
+              -0.062102985,
+              0.059486073,
+              -0.047937352,
+              0.07045817,
+              -0.024867272,
+              -0.010756205,
+              0.06538509,
+              -0.03693754,
+              -0.08240387,
+              0.08169191,
+              0.017090658,
+              0.012944557,
+              -0.047139525,
+              0.0025796075,
+              0.008701712,
+              0.099866174,
+              0.04969699,
+              -0.025922626,
+              -0.017354922,
+              0.03395182,
+              0.038391408,
+              -0.054247838,
+              0.008610521,
+              -0.04077977,
+              0.0265637,
+              -0.07186012,
+              -0.019953186,
+              -0.041191205,
+              -0.07246228,
+              0.00041248833,
+              0.018758524,
+              0.023036895,
+              0.01662864,
+              -0.06335885,
+              0.03495032,
+              0.050063577,
+              0.00043262896,
+              -0.06176693,
+              0.0062733325,
+              0.11142063,
+              0.0040838965,
+              0.085737824,
+              0.023284689,
+              0.05699812,
+              -0.03149832,
+              -0.013344509,
+              -0.045138564,
+              -0.117300816,
+              0.016063986,
+              -0.016894838,
+              -0.028934335,
+              0.03575864,
+              -0.05156192,
+              0.032958068,
+              -0.11266628,
+              0.06640015,
+              0.037839692,
+              0.022948038,
+              0.058071073,
+              -0.039643735,
+              -0.03247236,
+              0.017690921,
+              -0.005001274,
+              0.019046135,
+              0.07745316,
+              -0.020402163,
+              -0.020310633,
+              -0.009519755,
+              0.0031459313,
+              -0.0045639877,
+              -0.029116316,
+              0.033835515,
+              0.00050839526,
+              0.06419946,
+              0.010721198,
+              0.124151744,
+              -0.0053820186,
+              0.00491648,
+              -0.059696514,
+              0.029483523,
+              -0.13409872,
+              0.016187217,
+              -0.048092023,
+              -6.6084764e-33,
+              0.012305612,
+              0.060384244,
+              0.036461998,
+              -0.035974216,
+              -0.04197416,
+              0.012333701,
+              -0.084805995,
+              0.012502633,
+              0.02794982,
+              0.0861082,
+              -0.030791838,
+              -0.061355945,
+              -0.0009604986,
+              -0.0252044,
+              0.045444816,
+              -0.027590565,
+              -0.009594973,
+              0.006712001,
+              0.043692384,
+              -0.021483036,
+              0.003300438,
+              0.11860881,
+              0.047044385,
+              -0.1348901,
+              0.025469579,
+              -0.01029819,
+              0.0022393467,
+              -0.061863262,
+              0.10386513,
+              0.018658707,
+              -0.0017492755,
+              -0.051914047,
+              0.046442248,
+              0.03761067,
+              0.033752125,
+              0.006650237,
+              0.022015076,
+              -0.07834835,
+              -0.008209136,
+              0.027432231,
+              0.017393896,
+              -0.07524756,
+              0.006497012,
+              0.027272953,
+              0.0005804994,
+              -0.010941825,
+              -0.020050043,
+              -0.00012092298,
+              0.013705002,
+              0.004699541,
+              0.022770848,
+              0.015477994,
+              -0.0142482165,
+              -0.013953546,
+              0.015865315,
+              -0.023075614,
+              0.03379947,
+              -0.039221376,
+              -0.043229815,
+              0.02998769,
+              -0.01652291,
+              0.06981088,
+              0.04606923,
+              0.05332633,
+              -0.055300076,
+              0.02511626,
+              0.014049543,
+              -0.09398743,
+              0.03590562,
+              0.029452223,
+              -0.13200304,
+              -0.005059034,
+              -0.03784268,
+              -0.03180819,
+              -0.095502876,
+              -0.027853556,
+              0.0024331037,
+              -0.007881495,
+              0.058296,
+              -0.031999517,
+              -0.06077097,
+              -0.023381822,
+              -0.00048603877,
+              0.13765746,
+              -0.060579,
+              -0.008109843,
+              -0.034873307,
+              -0.1024547,
+              -0.009072849,
+              -0.018931676,
+              -0.0016711762,
+              -0.07710289,
+              -0.043332253,
+              -0.03619527,
+              0.03958017,
+              3.0217083e-33,
+              0.0050329794,
+              0.00016030145,
+              -0.063078895,
+              0.012225751,
+              0.10637338,
+              0.015972024,
+              0.006653195,
+              0.01880781,
+              -0.04708357,
+              0.045863643,
+              0.0076015075,
+              0.03243478,
+              0.032097474,
+              -0.020893326,
+              0.10697852,
+              0.0075498912,
+              0.036074348,
+              0.1462344,
+              0.03779065,
+              -0.043190572,
+              -0.02176097,
+              -0.009340132,
+              -0.06983617,
+              0.015578788,
+              0.021121953,
+              0.030661412,
+              0.08434581,
+              -0.09288574,
+              0.008169474,
+              0.078080945,
+              -0.081626564,
+              0.011895231,
+              0.017099649,
+              0.0040119104,
+              -0.14145434,
+              0.0040375097,
+              0.046316408,
+              0.008959473,
+              -0.0056506568,
+              -0.055587813,
+              0.028007837,
+              0.055937108,
+              0.062269785,
+              0.08602392,
+              -0.12157818,
+              0.021943888,
+              -0.0050934856,
+              0.029819332,
+              -0.012127162,
+              0.048801802,
+              0.06409215,
+              -0.041438665,
+              0.01809265,
+              -0.028214281,
+              -0.0213588,
+              0.05564267,
+              -0.1547868,
+              0.027465124,
+              0.018855799,
+              0.04327939,
+              0.011500479,
+              0.017364705,
+              -0.023216385,
+              0.051007293,
+              0.02946264,
+              0.012533944,
+              -0.04542834,
+              -0.002238765,
+              -0.05611544,
+              -0.0789272,
+              0.07960444,
+              -0.020431034,
+              -0.0762138,
+              0.011588508,
+              -0.035614885,
+              -0.04803985,
+              -0.06607436,
+              -0.057365946,
+              -0.040188126,
+              0.07176218,
+              0.03135825,
+              0.02303279,
+              -0.023997622,
+              0.023614945,
+              0.09607302,
+              -0.06843066,
+              0.014260722,
+              0.08802569,
+              -0.037736766,
+              0.029445928,
+              -0.028643936,
+              0.10217973,
+              -0.0660917,
+              0.022864237,
+              0.042151757,
+              -1.4814046e-08,
+              0.030838449,
+              0.043877687,
+              -0.0245681,
+              -0.09818859,
+              0.056659035,
+              0.0929652,
+              -0.010337853,
+              -0.0983916,
+              0.018008571,
+              -0.0131424805,
+              0.026400762,
+              0.008793538,
+              -0.05285605,
+              -0.042175982,
+              0.030133193,
+              0.01710666,
+              -0.06242493,
+              -0.018753909,
+              -0.015986755,
+              -0.018400662,
+              -0.026477808,
+              0.010281372,
+              -0.030476814,
+              -0.084556945,
+              -0.05402664,
+              0.010030052,
+              0.029531356,
+              0.13555466,
+              0.033426728,
+              0.12098221,
+              0.040777553,
+              0.008206964,
+              -0.018235989,
+              -0.0568263,
+              -0.1289943,
+              0.12416113,
+              -0.053454727,
+              -0.038151894,
+              0.030221034,
+              0.019807614,
+              0.047819767,
+              0.029434063,
+              0.0015704447,
+              0.0611775,
+              -0.05557245,
+              -0.030236417,
+              0.10799873,
+              -0.07073352,
+              -0.08215229,
+              0.004518122,
+              -0.015573616,
+              -0.013696145,
+              -0.0023438279,
+              0.026377691,
+              -0.015769389,
+              0.016251203,
+              -0.04062322,
+              -0.013962793,
+              -0.08309221,
+              0.031991288,
+              0.049991824,
+              -0.0038595141,
+              0.07031122,
+              0.0049263495
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/44a1d9de0602.json
+++ b/tests/integration/recordings/responses/44a1d9de0602.json
@ -20,7 +20,7 @@
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
-        "id": "chatcmpl-987",
+        "id": "chatcmpl-507",
        "choices": [
          {
            "finish_reason": "length",
@ -37,7 +37,7 @@
            }
          }
        ],
-        "created": 1755294921,
+        "created": 1756921150,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
--- a/tests/integration/recordings/responses/44fb9cf5875f.json
+++ b/tests/integration/recordings/responses/44fb9cf5875f.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-31T17:59:42.166585642Z",
+        "created_at": "2025-09-03T17:41:49.581065Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 9490295253,
-        "load_duration": 42349084,
+        "total_duration": 2391571708,
+        "load_duration": 182022958,
        "prompt_eval_count": 20,
-        "prompt_eval_duration": 545470166,
+        "prompt_eval_duration": 74456583,
        "eval_count": 51,
-        "eval_duration": 8901928284,
+        "eval_duration": 2134471458,
        "response": "It seems like you're trying to test the system, but I'm not sure what specific functionality or feature you'd like to test. Could you please provide more context or clarify what you're looking for? I'll do my best to assist you!",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/48d2fb183a2a.json
+++ b/tests/integration/recordings/responses/48d2fb183a2a.json
@ -67,15 +67,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-08-04T22:55:40.583477Z",
+        "created_at": "2025-09-03T17:36:40.283084Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 3928481500,
-        "load_duration": 151903250,
+        "total_duration": 2900042958,
+        "load_duration": 83372125,
        "prompt_eval_count": 259,
-        "prompt_eval_duration": 468000000,
+        "prompt_eval_duration": 352890750,
        "eval_count": 60,
-        "eval_duration": 3306000000,
+        "eval_duration": 2462885208,
        "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/50340cd4d253.json
+++ b/tests/integration/recordings/responses/50340cd4d253.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:14:19.298378Z",
+        "created_at": "2025-09-03T17:38:01.239743Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 266786083,
-        "load_duration": 53820458,
+        "total_duration": 207264667,
+        "load_duration": 73437959,
        "prompt_eval_count": 216,
-        "prompt_eval_duration": 192000000,
+        "prompt_eval_duration": 121657333,
        "eval_count": 2,
-        "eval_duration": 17000000,
+        "eval_duration": 11348417,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/5370751803dc.json
+++ b/tests/integration/recordings/responses/5370751803dc.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "Python is a high-level programming language with code readability and fewer lines than C++ or Java"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.07642644,
+              0.0213101,
+              -0.03612849,
+              -0.0012144424,
+              -0.048599217,
+              -0.13194773,
+              -0.084226094,
+              0.059389386,
+              -0.0617182,
+              -0.009323243,
+              -0.08099486,
+              0.055514984,
+              0.052610602,
+              0.026061919,
+              0.063071534,
+              -0.062316332,
+              -0.065115415,
+              -0.022351492,
+              0.017378356,
+              -0.11605584,
+              -0.036349725,
+              0.0404155,
+              -0.0325302,
+              -0.01770141,
+              0.05722761,
+              0.012393438,
+              -0.018529164,
+              -0.030017126,
+              0.002365914,
+              0.0066701965,
+              -0.08862459,
+              0.0779319,
+              0.03702611,
+              0.029523117,
+              -0.01977821,
+              0.05424799,
+              -0.00074063655,
+              -0.08949148,
+              -0.05312112,
+              -0.012703181,
+              -0.08622611,
+              0.07689996,
+              -0.038602136,
+              -0.011616902,
+              -0.03234132,
+              -0.0073969415,
+              -0.024779495,
+              -0.067999884,
+              -0.03039565,
+              -0.025974417,
+              -0.09690519,
+              0.009931951,
+              -0.05362519,
+              -0.09107193,
+              -0.009222061,
+              -0.008804084,
+              0.048185978,
+              -0.003329437,
+              -0.0058579347,
+              -0.13306528,
+              -0.09721703,
+              0.013474277,
+              0.047286008,
+              0.06279936,
+              -0.01582815,
+              -0.03771013,
+              -0.01651892,
+              0.029905442,
+              0.09326656,
+              -0.06746783,
+              -0.13385954,
+              -0.020873511,
+              -0.02586237,
+              0.11623731,
+              0.030632136,
+              -0.10494776,
+              0.03905967,
+              -0.010701787,
+              -0.0014734551,
+              0.020711906,
+              0.0017687598,
+              0.027797814,
+              -0.078500465,
+              0.10791581,
+              0.02910256,
+              -0.05398749,
+              0.030513834,
+              0.07001416,
+              -0.034323946,
+              0.00986597,
+              0.034644563,
+              -0.04232179,
+              0.065106474,
+              0.026648693,
+              -0.032122962,
+              0.07616709,
+              0.020026332,
+              -0.030642457,
+              -0.07188906,
+              0.027189687,
+              -0.018678213,
+              -0.05416582,
+              0.07488992,
+              0.017753933,
+              0.03386007,
+              0.02414506,
+              0.09077034,
+              -0.052096054,
+              0.040722203,
+              -0.018450806,
+              -0.012474094,
+              -0.06403705,
+              -0.023205942,
+              -0.061878704,
+              0.053436812,
+              0.047876816,
+              -0.010608645,
+              0.07852118,
+              0.03579911,
+              0.027097313,
+              0.022424318,
+              -0.004912598,
+              -0.02455264,
+              0.003700777,
+              0.00039888592,
+              -0.008842094,
+              0.009365857,
+              2.05052e-34,
+              -0.03236592,
+              -0.024301885,
+              0.027186498,
+              0.021633558,
+              0.06519107,
+              -0.019539308,
+              0.05306087,
+              0.007985293,
+              -0.03927361,
+              -0.020062907,
+              0.008070545,
+              0.02382429,
+              0.015006528,
+              0.1128094,
+              0.06113956,
+              -0.011911169,
+              0.016901307,
+              0.045509744,
+              0.0013988831,
+              0.00907712,
+              0.01314859,
+              -0.012022324,
+              0.027043821,
+              0.0071581583,
+              0.022573117,
+              -0.013721936,
+              -0.004378743,
+              -0.0007087661,
+              0.033585846,
+              0.011227843,
+              -0.05136015,
+              -0.0739591,
+              -0.03094639,
+              0.01957863,
+              -0.010360539,
+              -0.0029881562,
+              -0.00480912,
+              -0.10446798,
+              0.034694213,
+              -0.02424012,
+              -0.047155295,
+              0.035451673,
+              0.037169226,
+              -0.016986743,
+              0.0056092087,
+              0.05057555,
+              -0.008601115,
+              0.0060349177,
+              -0.12273999,
+              0.036871877,
+              -0.022267655,
+              -0.009739047,
+              0.075974636,
+              0.08902226,
+              0.01647873,
+              0.044345584,
+              0.06792565,
+              0.06456903,
+              -0.050189856,
+              -0.0016995457,
+              -0.00090498856,
+              0.09925942,
+              0.09253569,
+              -0.011321612,
+              0.050309792,
+              0.07697773,
+              0.0100068,
+              0.101032645,
+              0.03268899,
+              0.06433435,
+              -0.044524822,
+              0.03860177,
+              -0.019314477,
+              0.037440598,
+              -0.0017394378,
+              0.011816814,
+              0.011359969,
+              -0.1040215,
+              0.06984421,
+              0.01910163,
+              -0.028409261,
+              -0.013704911,
+              0.048502754,
+              -0.015429918,
+              -0.03423058,
+              -0.055616368,
+              0.005001686,
+              0.026054256,
+              -0.0007700968,
+              -0.0041726283,
+              -0.0640977,
+              -0.05985385,
+              0.0813829,
+              0.014288322,
+              -0.038147252,
+              -2.1576616e-33,
+              -0.027279941,
+              -0.034765568,
+              -0.02465107,
+              0.026859807,
+              -0.090699576,
+              -0.045698144,
+              0.013666582,
+              0.002109106,
+              0.054007426,
+              0.032838397,
+              -0.029939773,
+              -0.058843046,
+              0.09825693,
+              0.03251322,
+              0.109977886,
+              0.020682266,
+              -0.0958973,
+              0.0005566991,
+              0.0018037638,
+              0.017544486,
+              -0.06843023,
+              0.06435102,
+              -0.050149646,
+              -0.048880838,
+              -0.027535524,
+              -0.014993001,
+              -0.1210176,
+              -0.04412877,
+              -0.011025324,
+              0.058610573,
+              -0.007498303,
+              0.038722932,
+              -0.07025986,
+              0.030281536,
+              0.055707317,
+              -0.001162887,
+              0.01707519,
+              -0.042081844,
+              -0.016578361,
+              -0.025714336,
+              0.117893435,
+              0.04196084,
+              0.064787276,
+              0.046081997,
+              0.014950138,
+              0.030026693,
+              -0.039077066,
+              0.087156676,
+              -0.012328571,
+              -0.035646956,
+              -0.048145168,
+              0.041394625,
+              0.038984135,
+              -0.025188481,
+              -0.028836856,
+              -0.02917782,
+              0.029690607,
+              0.051454436,
+              -0.08629761,
+              -0.06921346,
+              -0.07273269,
+              -0.05952071,
+              0.0050034616,
+              0.025693603,
+              -0.022103382,
+              0.024972659,
+              -0.09724792,
+              0.0062089814,
+              -0.04963219,
+              -0.13054384,
+              0.124669954,
+              -0.01361085,
+              -0.022798477,
+              0.039057832,
+              -0.07550591,
+              0.049364913,
+              0.0007779102,
+              0.004692535,
+              -0.040757872,
+              0.06355995,
+              0.110190175,
+              0.02015945,
+              -0.048807338,
+              0.05842704,
+              -0.066375315,
+              0.026938869,
+              -0.062775925,
+              -0.014049011,
+              0.023343485,
+              0.02358394,
+              -0.002172394,
+              0.07766165,
+              0.031056313,
+              0.020171564,
+              -0.020073414,
+              -2.4317085e-08,
+              0.020261949,
+              -0.008623839,
+              0.0621209,
+              -0.008334477,
+              0.02526615,
+              0.08902315,
+              -0.007958188,
+              -0.018911751,
+              -0.035572145,
+              0.06189234,
+              -0.017249323,
+              -0.030186126,
+              -0.10225455,
+              -0.06522741,
+              -0.004033112,
+              0.10897627,
+              -0.02168822,
+              -0.053784374,
+              0.011841631,
+              0.052263785,
+              0.058334205,
+              0.0052479547,
+              -0.06017166,
+              0.08723854,
+              -0.08275336,
+              -0.040676847,
+              0.065786876,
+              0.028317772,
+              -0.012168614,
+              -0.07196286,
+              0.014588226,
+              -0.03231537,
+              0.0028357722,
+              0.03868031,
+              0.055439528,
+              -0.015238348,
+              0.05482384,
+              -0.025080629,
+              -0.033771332,
+              0.0030752022,
+              -0.037511814,
+              0.015122315,
+              0.02292684,
+              0.012024873,
+              0.03559873,
+              0.006865039,
+              -0.04049267,
+              -0.049685854,
+              -0.05455341,
+              -0.073071465,
+              -0.024902396,
+              -0.002133957,
+              -0.013212662,
+              -0.06657236,
+              0.023245512,
+              0.046919,
+              -0.13278763,
+              -0.011092663,
+              -0.023939205,
+              0.043182902,
+              0.024406029,
+              0.06922961,
+              0.15658055,
+              0.017658537
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 21,
+          "total_tokens": 21
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/545d86510a80.json
+++ b/tests/integration/recordings/responses/545d86510a80.json
@ -22,7 +22,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.59711Z",
+          "created_at": "2025-09-03T17:42:32.625862Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -40,7 +40,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.671294Z",
+          "created_at": "2025-09-03T17:42:32.668885Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -58,7 +58,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.736161Z",
+          "created_at": "2025-09-03T17:42:32.710947Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -76,7 +76,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.809857Z",
+          "created_at": "2025-09-03T17:42:32.752286Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -94,7 +94,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.883599Z",
+          "created_at": "2025-09-03T17:42:32.793309Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -112,7 +112,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.942471Z",
+          "created_at": "2025-09-03T17:42:32.834578Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -130,7 +130,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:38.999844Z",
+          "created_at": "2025-09-03T17:42:32.876536Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -148,7 +148,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:39.050862Z",
+          "created_at": "2025-09-03T17:42:32.918807Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -166,7 +166,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:39.104589Z",
+          "created_at": "2025-09-03T17:42:32.960101Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -184,7 +184,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:39.158301Z",
+          "created_at": "2025-09-03T17:42:33.00196Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -202,7 +202,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:39.210985Z",
+          "created_at": "2025-09-03T17:42:33.043876Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -220,7 +220,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:39.263525Z",
+          "created_at": "2025-09-03T17:42:33.08756Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -238,15 +238,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:39.314455Z",
+          "created_at": "2025-09-03T17:42:33.12966Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 914060542,
-          "load_duration": 63705209,
+          "total_duration": 648814958,
+          "load_duration": 75300875,
          "prompt_eval_count": 408,
-          "prompt_eval_duration": 95000000,
+          "prompt_eval_duration": 66740291,
          "eval_count": 13,
-          "eval_duration": 753000000,
+          "eval_duration": 505313125,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/554de3cd986f.json
+++ b/tests/integration/recordings/responses/554de3cd986f.json
@ -22,7 +22,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.40585Z",
+          "created_at": "2025-09-03T17:37:51.805591Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -40,7 +40,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.455647Z",
+          "created_at": "2025-09-03T17:37:51.850067Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -58,7 +58,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.509581Z",
+          "created_at": "2025-09-03T17:37:51.892443Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -76,7 +76,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.56592Z",
+          "created_at": "2025-09-03T17:37:51.934364Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -94,7 +94,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.616979Z",
+          "created_at": "2025-09-03T17:37:51.978382Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -112,7 +112,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.671413Z",
+          "created_at": "2025-09-03T17:37:52.019332Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -130,7 +130,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.725494Z",
+          "created_at": "2025-09-03T17:37:52.060708Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -148,7 +148,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.779905Z",
+          "created_at": "2025-09-03T17:37:52.102717Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -166,7 +166,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.829791Z",
+          "created_at": "2025-09-03T17:37:52.143996Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -184,7 +184,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.880729Z",
+          "created_at": "2025-09-03T17:37:52.185479Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -202,7 +202,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.93338Z",
+          "created_at": "2025-09-03T17:37:52.227562Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -220,7 +220,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:04.981714Z",
+          "created_at": "2025-09-03T17:37:52.270178Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -238,7 +238,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.036068Z",
+          "created_at": "2025-09-03T17:37:52.31151Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -256,7 +256,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.088069Z",
+          "created_at": "2025-09-03T17:37:52.35278Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -274,7 +274,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.144485Z",
+          "created_at": "2025-09-03T17:37:52.393954Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -292,7 +292,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.203042Z",
+          "created_at": "2025-09-03T17:37:52.435238Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -310,7 +310,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.257133Z",
+          "created_at": "2025-09-03T17:37:52.476197Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -328,7 +328,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.311623Z",
+          "created_at": "2025-09-03T17:37:52.517914Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -346,15 +346,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:05.370124Z",
+          "created_at": "2025-09-03T17:37:52.55904Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 1532801458,
-          "load_duration": 213911041,
+          "total_duration": 971882292,
+          "load_duration": 116634209,
          "prompt_eval_count": 376,
-          "prompt_eval_duration": 350000000,
+          "prompt_eval_duration": 99382958,
          "eval_count": 19,
-          "eval_duration": 967000000,
+          "eval_duration": 755260750,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/561746e1c8de.json
+++ b/tests/integration/recordings/responses/561746e1c8de.json
@ -21,7 +21,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:49.18651486Z",
+          "created_at": "2025-09-03T17:36:20.465701Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -39,7 +39,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:49.370611348Z",
+          "created_at": "2025-09-03T17:36:20.507671Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -57,7 +57,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:49.557000029Z",
+          "created_at": "2025-09-03T17:36:20.549443Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -75,7 +75,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:49.746777116Z",
+          "created_at": "2025-09-03T17:36:20.590803Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -93,7 +93,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:49.942233333Z",
+          "created_at": "2025-09-03T17:36:20.631683Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -111,7 +111,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:50.126788846Z",
+          "created_at": "2025-09-03T17:36:20.672443Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -129,7 +129,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:50.311346131Z",
+          "created_at": "2025-09-03T17:36:20.713329Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -147,7 +147,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:50.501507173Z",
+          "created_at": "2025-09-03T17:36:20.754254Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -165,7 +165,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:50.692296777Z",
+          "created_at": "2025-09-03T17:36:20.795119Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -183,7 +183,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:50.878846539Z",
+          "created_at": "2025-09-03T17:36:20.836145Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -201,15 +201,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-15T20:24:51.063200561Z",
+          "created_at": "2025-09-03T17:36:20.877784Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 33982453650,
-          "load_duration": 2909001805,
+          "total_duration": 612057417,
+          "load_duration": 97443583,
          "prompt_eval_count": 341,
-          "prompt_eval_duration": 29194357307,
+          "prompt_eval_duration": 100914750,
          "eval_count": 11,
-          "eval_duration": 1878247732,
+          "eval_duration": 413024250,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/563b994bb7d1.json
+++ b/tests/integration/recordings/responses/563b994bb7d1.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-08-04T22:55:13.25248Z",
+        "created_at": "2025-09-03T17:36:19.594923Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 1344654917,
-        "load_duration": 200585375,
+        "total_duration": 988472417,
+        "load_duration": 117976625,
        "prompt_eval_count": 326,
-        "prompt_eval_duration": 564000000,
+        "prompt_eval_duration": 451625542,
        "eval_count": 11,
-        "eval_duration": 578000000,
+        "eval_duration": 418313417,
        "response": "[get_weather(location=\"San Francisco, CA\")]",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/5f5d16afadb4.json
+++ b/tests/integration/recordings/responses/5f5d16afadb4.json
@ -21,7 +21,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.354888Z",
+          "created_at": "2025-09-03T17:36:19.808372Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -39,7 +39,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.427569Z",
+          "created_at": "2025-09-03T17:36:19.84991Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -57,7 +57,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.486244Z",
+          "created_at": "2025-09-03T17:36:19.892111Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -75,7 +75,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.540455Z",
+          "created_at": "2025-09-03T17:36:19.933857Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -93,7 +93,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.594439Z",
+          "created_at": "2025-09-03T17:36:19.975148Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -111,7 +111,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.649837Z",
+          "created_at": "2025-09-03T17:36:20.016641Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -129,7 +129,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.703358Z",
+          "created_at": "2025-09-03T17:36:20.058229Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -147,7 +147,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.7553Z",
+          "created_at": "2025-09-03T17:36:20.100222Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -165,7 +165,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.807251Z",
+          "created_at": "2025-09-03T17:36:20.143456Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -183,7 +183,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.857952Z",
+          "created_at": "2025-09-03T17:36:20.184657Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -201,15 +201,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:13.918522Z",
+          "created_at": "2025-09-03T17:36:20.226017Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 647785042,
-          "load_duration": 26355584,
+          "total_duration": 598395375,
+          "load_duration": 129432167,
          "prompt_eval_count": 326,
-          "prompt_eval_duration": 55000000,
+          "prompt_eval_duration": 50057334,
          "eval_count": 11,
-          "eval_duration": 557000000,
+          "eval_duration": 418284791,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/62aa454ea5f9.json
+++ b/tests/integration/recordings/responses/62aa454ea5f9.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "What inspires neural networks?"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.08566708,
+              -0.09559047,
+              0.044014607,
+              -0.015974598,
+              0.029406257,
+              0.07229597,
+              -0.010901963,
+              -0.023829829,
+              0.07381301,
+              -0.05698464,
+              -0.033780586,
+              0.051200844,
+              0.0050912783,
+              0.014317088,
+              -0.07878143,
+              -0.012908666,
+              -0.041628323,
+              0.06881713,
+              -0.10783476,
+              -0.04042705,
+              0.026262026,
+              -0.0019893218,
+              -0.011008084,
+              -0.0019646112,
+              0.004033132,
+              0.08881656,
+              0.014049165,
+              -0.018416086,
+              0.032621212,
+              -0.034692146,
+              0.07614942,
+              -0.014122101,
+              -0.024901746,
+              0.03755059,
+              -0.10197354,
+              0.054705318,
+              -0.022539826,
+              0.024209768,
+              0.011698194,
+              -0.008956377,
+              -0.050146304,
+              0.0026327297,
+              0.055942897,
+              0.009974366,
+              0.12796965,
+              -0.025006283,
+              0.024338534,
+              -0.024487961,
+              -0.0022703854,
+              -0.024687177,
+              -0.10482094,
+              -0.05994297,
+              -0.055200897,
+              0.0152664175,
+              0.03496896,
+              0.052624088,
+              -0.0006445885,
+              0.06637695,
+              -0.031790398,
+              -0.007308742,
+              -0.0050764186,
+              -0.042508755,
+              -0.04089097,
+              0.020062948,
+              0.038683955,
+              0.022463562,
+              -0.02866933,
+              0.053370677,
+              0.022435635,
+              0.01934692,
+              0.12264713,
+              0.023911418,
+              -0.037264284,
+              0.0059156846,
+              0.05235448,
+              0.054004095,
+              0.08022169,
+              -0.010992806,
+              0.029295033,
+              -0.0672064,
+              -0.00021147476,
+              -0.050584126,
+              -0.0095251575,
+              0.04616498,
+              0.078677796,
+              0.01416309,
+              -0.033226117,
+              0.0018380182,
+              -0.06667651,
+              -0.020977372,
+              -0.017116925,
+              -0.04396714,
+              -0.05969979,
+              -0.07344942,
+              -0.03985366,
+              -0.030863814,
+              -0.019918729,
+              -0.1075161,
+              -0.026654154,
+              0.0689854,
+              -0.0049292273,
+              0.026645623,
+              0.018879393,
+              0.022113768,
+              0.064208575,
+              -0.053153764,
+              0.06160797,
+              0.014026719,
+              0.11772326,
+              -0.051769163,
+              -0.07634968,
+              0.03090975,
+              -0.038558383,
+              -0.025260162,
+              0.039262023,
+              -0.061449137,
+              0.008389126,
+              0.016175874,
+              0.032293033,
+              0.06679397,
+              -0.06503257,
+              0.014676881,
+              -0.038542666,
+              0.018718671,
+              -0.030111106,
+              -0.028481327,
+              -0.14707623,
+              -3.455443e-33,
+              -0.048577547,
+              -0.024983348,
+              0.071679614,
+              0.035652317,
+              0.07931413,
+              -0.07811974,
+              0.023085583,
+              -0.047467884,
+              0.08872273,
+              -0.0010074769,
+              -0.11320135,
+              0.091322996,
+              0.023978539,
+              0.11368158,
+              0.042203873,
+              -0.05773289,
+              -0.074543044,
+              -0.0021036167,
+              -0.051522236,
+              -0.050925426,
+              -0.0016557347,
+              0.030671587,
+              0.045119714,
+              -0.03974729,
+              -0.05871358,
+              -0.030611658,
+              0.0017253247,
+              0.009114429,
+              -0.013763352,
+              0.023424039,
+              0.0017495834,
+              0.046633217,
+              -0.07230643,
+              -0.027882291,
+              0.016182518,
+              0.044456217,
+              -0.004326421,
+              -0.061798126,
+              0.0697968,
+              0.031249145,
+              -0.013697079,
+              -0.007417679,
+              0.031665757,
+              -0.02367961,
+              0.07153089,
+              0.023938214,
+              0.009729952,
+              0.0071919435,
+              -0.03235391,
+              -0.04955071,
+              -0.050248373,
+              0.02151118,
+              0.015327139,
+              -0.0674203,
+              0.06544387,
+              -0.025547959,
+              0.03207046,
+              0.02038825,
+              0.0112230005,
+              0.00019493286,
+              -0.023462659,
+              -0.004949742,
+              -0.014066955,
+              0.0014178518,
+              0.059315395,
+              0.039931085,
+              -0.032498423,
+              -0.023698896,
+              0.05445033,
+              0.064231694,
+              -0.034013335,
+              0.08745776,
+              -0.080473825,
+              -0.090545714,
+              -0.065398656,
+              -8.2386265e-05,
+              -0.021441188,
+              -0.0684535,
+              -0.029121745,
+              0.034134887,
+              -0.07799698,
+              -0.05388711,
+              -0.035591345,
+              0.044826802,
+              -0.040090464,
+              0.07972004,
+              0.026058797,
+              -0.08184859,
+              0.0018106091,
+              -0.027676936,
+              -0.04312832,
+              -0.042090744,
+              0.08336437,
+              -0.049453646,
+              -0.0902778,
+              2.6716498e-33,
+              -0.091911495,
+              0.02641473,
+              -0.07022486,
+              0.075562105,
+              0.03900905,
+              0.027913846,
+              -0.05444872,
+              -0.036666486,
+              -0.048225258,
+              0.07551892,
+              0.046452336,
+              0.025874302,
+              0.052248206,
+              -0.00018527219,
+              0.010575236,
+              -0.040591337,
+              -0.028484622,
+              -0.020559357,
+              0.08882296,
+              -0.06755767,
+              0.04941752,
+              0.13231009,
+              -0.06998129,
+              -0.040112328,
+              0.044030365,
+              0.034218542,
+              -0.08650528,
+              0.05746921,
+              -0.0075130556,
+              0.049070083,
+              -0.0148686,
+              -0.018103259,
+              -0.020280316,
+              0.038828347,
+              0.022253176,
+              0.13486238,
+              0.06899369,
+              -0.002589861,
+              -0.016430879,
+              0.0033818923,
+              0.017275693,
+              0.013614936,
+              0.044220798,
+              0.049155377,
+              -0.008259856,
+              -0.046575654,
+              -0.043921605,
+              0.04156687,
+              -0.035468902,
+              0.042837795,
+              0.03131579,
+              0.017961076,
+              -0.026213305,
+              -0.05458616,
+              -0.04259084,
+              -0.004110002,
+              0.029035388,
+              0.0010451805,
+              0.09044077,
+              0.014110149,
+              -0.068820216,
+              -0.07098938,
+              0.020328037,
+              0.00433692,
+              -0.046977337,
+              0.016492791,
+              -0.028396707,
+              0.104340956,
+              0.002814702,
+              -0.08339559,
+              0.037326302,
+              0.058929898,
+              0.0376423,
+              0.09580634,
+              -0.12376848,
+              -0.054060236,
+              -0.014485116,
+              0.0013106487,
+              -0.04537336,
+              -0.0899294,
+              0.001730278,
+              -0.05520831,
+              0.000568523,
+              0.00053380145,
+              0.07856981,
+              0.104590714,
+              0.00355283,
+              0.008365939,
+              0.04291482,
+              0.010064388,
+              0.025177509,
+              0.05732803,
+              -0.023061136,
+              0.054399785,
+              -0.049828697,
+              -1.3290186e-08,
+              -0.0539168,
+              0.08074109,
+              0.03397028,
+              0.024365881,
+              0.0906225,
+              -0.07162824,
+              0.07550329,
+              0.017278913,
+              -0.061226364,
+              -0.03298407,
+              0.07829606,
+              0.03967995,
+              -0.036696997,
+              0.02665964,
+              0.1000655,
+              -0.014426734,
+              0.020708792,
+              -0.039230846,
+              0.0085029,
+              -0.0012509917,
+              0.06740856,
+              0.013992665,
+              -0.054007422,
+              -0.016785627,
+              0.07651403,
+              -0.035508703,
+              -0.050085396,
+              0.08382383,
+              -0.009957674,
+              0.08140875,
+              0.019287178,
+              0.049911316,
+              0.0022236605,
+              -0.07807412,
+              0.019454133,
+              0.111560374,
+              -0.01269702,
+              -0.06466137,
+              -0.09346588,
+              -0.050038446,
+              -0.042178612,
+              0.0599713,
+              0.034831088,
+              -0.014957726,
+              0.014484159,
+              -0.022619838,
+              0.06916277,
+              -0.088544875,
+              0.021478733,
+              0.01378541,
+              -0.0075770007,
+              0.027888266,
+              0.015526889,
+              0.0052174823,
+              0.010616002,
+              -0.022908956,
+              -0.02535865,
+              -0.04139556,
+              -0.08375561,
+              0.092626974,
+              0.051755503,
+              0.09296614,
+              0.011223383,
+              -0.016759252
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 6,
+          "total_tokens": 6
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/recordings/responses/6906a6e71988.json
+++ b/tests/integration/recordings/responses/6906a6e71988.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama-guard3:1b",
-        "created_at": "2025-08-01T23:14:18.886381Z",
+        "created_at": "2025-09-03T17:38:00.98692Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 488566500,
-        "load_duration": 113477291,
+        "total_duration": 332473583,
+        "load_duration": 90611333,
        "prompt_eval_count": 317,
-        "prompt_eval_duration": 361000000,
+        "prompt_eval_duration": 229691000,
        "eval_count": 2,
-        "eval_duration": 12000000,
+        "eval_duration": 11571291,
        "response": "safe",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/6cc063bbd7d3.json
+++ b/tests/integration/recordings/responses/6cc063bbd7d3.json
@ -21,7 +21,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:55.9885Z",
+          "created_at": "2025-09-03T17:42:17.402486Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -39,7 +39,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.054143Z",
+          "created_at": "2025-09-03T17:42:17.444334Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -57,7 +57,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.117658Z",
+          "created_at": "2025-09-03T17:42:17.484625Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -75,7 +75,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.179422Z",
+          "created_at": "2025-09-03T17:42:17.525063Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -93,7 +93,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.240328Z",
+          "created_at": "2025-09-03T17:42:17.565015Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -111,7 +111,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.295992Z",
+          "created_at": "2025-09-03T17:42:17.60499Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -129,7 +129,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.355683Z",
+          "created_at": "2025-09-03T17:42:17.64509Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -147,7 +147,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.412176Z",
+          "created_at": "2025-09-03T17:42:17.685566Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -165,7 +165,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.466952Z",
+          "created_at": "2025-09-03T17:42:17.725855Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -183,7 +183,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.517222Z",
+          "created_at": "2025-09-03T17:42:17.766056Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -201,7 +201,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.570491Z",
+          "created_at": "2025-09-03T17:42:17.806415Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -219,7 +219,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.623189Z",
+          "created_at": "2025-09-03T17:42:17.847273Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -237,7 +237,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.679221Z",
+          "created_at": "2025-09-03T17:42:17.888576Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -255,7 +255,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.731373Z",
+          "created_at": "2025-09-03T17:42:17.928952Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -273,7 +273,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.781364Z",
+          "created_at": "2025-09-03T17:42:17.969744Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -291,7 +291,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.831951Z",
+          "created_at": "2025-09-03T17:42:18.010869Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -309,7 +309,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.888381Z",
+          "created_at": "2025-09-03T17:42:18.051109Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -327,7 +327,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.943539Z",
+          "created_at": "2025-09-03T17:42:18.093266Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -345,7 +345,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:56.997422Z",
+          "created_at": "2025-09-03T17:42:18.135749Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -363,15 +363,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-04T22:55:57.056259Z",
+          "created_at": "2025-09-03T17:42:18.176649Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 1289815458,
-          "load_duration": 119745583,
+          "total_duration": 907420000,
+          "load_duration": 66756750,
          "prompt_eval_count": 26,
-          "prompt_eval_duration": 98000000,
+          "prompt_eval_duration": 62900875,
          "eval_count": 20,
-          "eval_duration": 1071000000,
+          "eval_duration": 777306958,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/6d35c91287e2.json
+++ b/tests/integration/recordings/responses/6d35c91287e2.json
@ -22,7 +22,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.362667Z",
+          "created_at": "2025-09-03T17:38:03.549266Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -40,7 +40,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.427435Z",
+          "created_at": "2025-09-03T17:38:03.592203Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -58,7 +58,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.484198Z",
+          "created_at": "2025-09-03T17:38:03.63417Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -76,7 +76,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.537031Z",
+          "created_at": "2025-09-03T17:38:03.677268Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -94,7 +94,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.591198Z",
+          "created_at": "2025-09-03T17:38:03.719768Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -112,7 +112,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.643336Z",
+          "created_at": "2025-09-03T17:38:03.762204Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -130,7 +130,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.698589Z",
+          "created_at": "2025-09-03T17:38:03.80404Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -148,7 +148,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.752904Z",
+          "created_at": "2025-09-03T17:38:03.845678Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -166,7 +166,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.804Z",
+          "created_at": "2025-09-03T17:38:03.887086Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -184,7 +184,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.855633Z",
+          "created_at": "2025-09-03T17:38:03.928422Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -202,7 +202,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.906918Z",
+          "created_at": "2025-09-03T17:38:03.969641Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -220,7 +220,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:22.958729Z",
+          "created_at": "2025-09-03T17:38:04.011212Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -238,15 +238,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:23.011279Z",
+          "created_at": "2025-09-03T17:38:04.052626Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 793500292,
-          "load_duration": 55339750,
+          "total_duration": 731936583,
+          "load_duration": 147334791,
          "prompt_eval_count": 417,
-          "prompt_eval_duration": 83000000,
+          "prompt_eval_duration": 79443792,
          "eval_count": 13,
-          "eval_duration": 653000000,
+          "eval_duration": 504352750,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/6f96090aa955.json
+++ b/tests/integration/recordings/responses/6f96090aa955.json
@ -1,7 +1,7 @@
 {
  "request": {
    "method": "POST",
-    "url": "http://localhost:11434/v1/v1/chat/completions",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
@ -21,7 +21,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -36,7 +36,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -47,7 +47,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -62,7 +62,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -73,11 +73,11 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
-                "content": " Welcome",
+                "content": " It",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
@ -88,7 +88,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -99,7 +99,59 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1756921359,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
+          "choices": [
+            {
+              "delta": {
+                "content": " nice",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 1756921359,
+          "model": "llama3.2:3b-instruct-fp16",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": "fp_ollama",
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -114,7 +166,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -125,11 +177,11 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
-                "content": " our",
+                "content": " meet",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
@ -140,7 +192,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -151,11 +203,11 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
-                "content": " conversation",
+                "content": " you",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
@ -166,7 +218,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -177,7 +229,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -192,7 +244,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081849,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -203,7 +255,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -218,7 +270,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -229,7 +281,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -244,7 +296,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -255,7 +307,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -270,7 +322,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -281,7 +333,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -296,7 +348,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -307,7 +359,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -322,7 +374,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -333,7 +385,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -348,7 +400,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -359,7 +411,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -374,7 +426,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -385,7 +437,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -400,7 +452,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -411,33 +463,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
-          "choices": [
-            {
-              "delta": {
-                "content": ",",
-                "function_call": null,
-                "refusal": null,
-                "role": "assistant",
-                "tool_calls": null
-              },
-              "finish_reason": null,
-              "index": 0,
-              "logprobs": null
-            }
-          ],
-          "created": 1754081850,
-          "model": "llama3.2:3b-instruct-fp16",
-          "object": "chat.completion.chunk",
-          "service_tier": null,
-          "system_fingerprint": "fp_ollama",
-          "usage": null
-        }
-      },
-      {
-        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
-        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -452,7 +478,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -463,7 +489,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -478,7 +504,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921359,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -489,7 +515,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -504,7 +530,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921360,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -515,7 +541,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -530,7 +556,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921360,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -541,7 +567,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -556,7 +582,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921360,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -567,7 +593,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -582,7 +608,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921360,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -593,7 +619,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -608,7 +634,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921360,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
@ -619,7 +645,7 @@
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
-          "id": "chatcmpl-333",
+          "id": "chatcmpl-698",
          "choices": [
            {
              "delta": {
@ -634,7 +660,7 @@
              "logprobs": null
            }
          ],
-          "created": 1754081850,
+          "created": 1756921360,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
--- a/tests/integration/recordings/responses/6fbea1abca7c.json
+++ b/tests/integration/recordings/responses/6fbea1abca7c.json
@ -22,7 +22,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.337763Z",
+          "created_at": "2025-09-03T17:38:01.89965Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -40,7 +40,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.394358Z",
+          "created_at": "2025-09-03T17:38:01.941253Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -58,7 +58,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.451349Z",
+          "created_at": "2025-09-03T17:38:01.982621Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -76,7 +76,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.504443Z",
+          "created_at": "2025-09-03T17:38:02.024144Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -94,7 +94,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.555779Z",
+          "created_at": "2025-09-03T17:38:02.065495Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -112,7 +112,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.607807Z",
+          "created_at": "2025-09-03T17:38:02.107529Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -130,7 +130,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.660627Z",
+          "created_at": "2025-09-03T17:38:02.149217Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -148,7 +148,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.711562Z",
+          "created_at": "2025-09-03T17:38:02.190357Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -166,7 +166,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.761822Z",
+          "created_at": "2025-09-03T17:38:02.231501Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -184,7 +184,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.81712Z",
+          "created_at": "2025-09-03T17:38:02.272546Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -202,7 +202,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.868755Z",
+          "created_at": "2025-09-03T17:38:02.313561Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -220,7 +220,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.921049Z",
+          "created_at": "2025-09-03T17:38:02.354563Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -238,7 +238,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:20.973584Z",
+          "created_at": "2025-09-03T17:38:02.395585Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -256,7 +256,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:21.030707Z",
+          "created_at": "2025-09-03T17:38:02.436854Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -274,7 +274,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:21.082015Z",
+          "created_at": "2025-09-03T17:38:02.47814Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -292,7 +292,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:21.132945Z",
+          "created_at": "2025-09-03T17:38:02.519661Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -310,7 +310,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:21.187452Z",
+          "created_at": "2025-09-03T17:38:02.561119Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -328,7 +328,7 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:21.239827Z",
+          "created_at": "2025-09-03T17:38:02.602821Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
@ -346,15 +346,15 @@
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
-          "created_at": "2025-08-01T23:14:21.294154Z",
+          "created_at": "2025-09-03T17:38:02.644633Z",
          "done": true,
          "done_reason": "stop",
-          "total_duration": 1929211666,
-          "load_duration": 61298666,
+          "total_duration": 1375629459,
+          "load_duration": 94090250,
          "prompt_eval_count": 386,
-          "prompt_eval_duration": 908000000,
+          "prompt_eval_duration": 535119167,
          "eval_count": 19,
-          "eval_duration": 959000000,
+          "eval_duration": 745684041,
          "response": "",
          "thinking": null,
          "context": null
--- a/tests/integration/recordings/responses/6fe1d4fedf12.json
+++ b/tests/integration/recordings/responses/6fe1d4fedf12.json
--- a/tests/integration/recordings/responses/70adef2c30c4.json
+++ b/tests/integration/recordings/responses/70adef2c30c4.json
@ -20,15 +20,15 @@
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-08-04T22:55:55.720345Z",
+        "created_at": "2025-09-03T17:42:17.227488Z",
        "done": true,
        "done_reason": "stop",
-        "total_duration": 3865701084,
-        "load_duration": 52435459,
+        "total_duration": 3003964916,
+        "load_duration": 111221916,
        "prompt_eval_count": 30,
-        "prompt_eval_duration": 99000000,
+        "prompt_eval_duration": 72578583,
        "eval_count": 70,
-        "eval_duration": 3712000000,
+        "eval_duration": 2819555375,
        "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
        "thinking": null,
        "context": null
--- a/tests/integration/recordings/responses/72c1126ff2f9.json
+++ b/tests/integration/recordings/responses/72c1126ff2f9.json
@ -0,0 +1,422 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
+    "headers": {},
+    "body": {
+      "model": "all-minilm:l6-v2",
+      "input": [
+        "artificial intelligence"
+      ],
+      "encoding_format": "float"
+    },
+    "endpoint": "/v1/embeddings",
+    "model": "all-minilm:l6-v2"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
+      "__data__": {
+        "data": [
+          {
+            "embedding": [
+              -0.024330618,
+              0.016706783,
+              0.037677176,
+              -0.00915746,
+              -0.030534461,
+              -0.017140884,
+              0.074272,
+              0.0456916,
+              -0.009377196,
+              0.009883053,
+              -0.0056895507,
+              0.007668296,
+              0.039537333,
+              0.015226257,
+              -0.083189555,
+              0.019439526,
+              -0.022046678,
+              -0.033254813,
+              -0.18105465,
+              -0.13025087,
+              -0.0022671346,
+              0.013451522,
+              -0.024325468,
+              -0.0370128,
+              0.0020083552,
+              0.08566712,
+              0.0047639925,
+              -0.0033431018,
+              -0.006082307,
+              -0.11575565,
+              0.06682902,
+              -0.018777572,
+              0.08786827,
+              -0.0074177794,
+              -0.093573004,
+              0.06146399,
+              -0.08110609,
+              0.012222862,
+              0.03971064,
+              -0.0026197461,
+              -0.04657111,
+              -0.08183902,
+              0.03959615,
+              0.015451151,
+              0.04370617,
+              0.103643835,
+              -0.058421485,
+              0.036699355,
+              -0.052699573,
+              0.040590122,
+              -0.12578927,
+              0.006500531,
+              -0.03583627,
+              -0.010050973,
+              -0.023851713,
+              0.045972254,
+              0.014605586,
+              0.019414552,
+              0.028465148,
+              -0.055030964,
+              0.024210233,
+              -0.052867457,
+              0.015230711,
+              -0.0043921247,
+              0.092372045,
+              0.033849865,
+              -0.04737281,
+              0.03204496,
+              0.001322036,
+              -0.051211488,
+              0.025862284,
+              0.08155327,
+              0.04092595,
+              0.019154705,
+              0.056453932,
+              -0.052758913,
+              0.030533386,
+              -0.01663434,
+              0.07877244,
+              -0.054262977,
+              -0.042149354,
+              -0.045443602,
+              -0.052689902,
+              0.11225497,
+              0.01989102,
+              -0.042375352,
+              -0.01168115,
+              0.024315914,
+              0.01915792,
+              -0.016550383,
+              -0.01030883,
+              -0.08545277,
+              0.023834355,
+              -0.042181373,
+              -0.02503509,
+              0.062114798,
+              -0.0045557353,
+              -0.15369569,
+              0.001106691,
+              0.19423288,
+              -0.0338511,
+              0.026152972,
+              -0.02032091,
+              0.0012884078,
+              -0.0010269672,
+              -0.02411262,
+              0.017495485,
+              -0.009808713,
+              0.07037937,
+              -0.13769862,
+              -0.11118059,
+              -0.01736481,
+              0.06603106,
+              -0.05188892,
+              0.0019610007,
+              0.014606686,
+              0.060775463,
+              0.096280165,
+              0.013551965,
+              0.019343173,
+              -0.00010512453,
+              -0.026652312,
+              -0.009341819,
+              0.07083247,
+              -0.0034617546,
+              -0.062412772,
+              -0.044611085,
+              -8.796679e-34,
+              -0.111884,
+              -0.04256611,
+              0.027425196,
+              0.06574074,
+              0.002830377,
+              -0.044104468,
+              0.005238822,
+              -0.036899913,
+              -0.015583552,
+              0.0206543,
+              -0.059225976,
+              0.007236511,
+              -0.028716031,
+              0.040467348,
+              0.13387093,
+              0.006795838,
+              -0.01636956,
+              0.082198486,
+              -0.02261007,
+              -0.03641293,
+              0.06524453,
+              0.021011814,
+              -0.005472363,
+              -0.038433436,
+              0.001462021,
+              0.0073671984,
+              0.016773427,
+              -0.062663026,
+              0.035388503,
+              -0.014395795,
+              0.027888605,
+              0.0837546,
+              -0.027772024,
+              -0.0036210797,
+              0.03903557,
+              -0.026879627,
+              -0.018737236,
+              0.019059159,
+              0.06522148,
+              0.0070414003,
+              0.004749159,
+              -0.0030224407,
+              0.040062208,
+              0.028016094,
+              -0.004660955,
+              0.012264517,
+              0.08708117,
+              -0.0070171114,
+              -0.03749808,
+              0.011326775,
+              0.015419708,
+              0.013775354,
+              0.017958472,
+              -0.009817919,
+              0.09011542,
+              0.05170552,
+              -0.034259036,
+              0.0043903207,
+              -0.01884889,
+              -0.031481344,
+              0.08216297,
+              0.016875258,
+              -0.022163702,
+              0.06844141,
+              0.01581623,
+              0.020322658,
+              0.0063856863,
+              0.016461994,
+              0.12718283,
+              0.014996434,
+              -0.010813858,
+              0.0017669421,
+              0.03166716,
+              -0.044353984,
+              -0.05225622,
+              0.022843942,
+              0.050988898,
+              -0.018916955,
+              0.0027930918,
+              -0.033645593,
+              -0.13571611,
+              -0.027015164,
+              -0.035672266,
+              -0.033537813,
+              0.047864296,
+              -0.0054381513,
+              0.021346755,
+              -0.040034927,
+              0.019374551,
+              0.012011466,
+              -0.04336231,
+              0.00054701004,
+              0.034879614,
+              0.017960642,
+              -0.062501945,
+              8.224154e-34,
+              -0.09450138,
+              0.013776636,
+              -0.025351105,
+              0.098992504,
+              0.045503527,
+              -0.02053458,
+              -0.029694881,
+              -0.059200566,
+              0.042453792,
+              0.0844487,
+              -0.043211546,
+              -0.0077362363,
+              0.049354795,
+              0.04203366,
+              -0.036539596,
+              0.014424774,
+              0.040357023,
+              -0.058971472,
+              0.010022987,
+              0.059877146,
+              -0.02790864,
+              0.034927685,
+              -0.087597504,
+              -0.060616262,
+              -0.0048867166,
+              0.08776906,
+              -0.0053599468,
+              -0.021816833,
+              -0.048162397,
+              0.046919785,
+              0.0083988905,
+              -0.0517289,
+              -0.020422187,
+              0.08581073,
+              -0.022597926,
+              0.034425046,
+              -0.014506674,
+              0.0031332907,
+              -0.04651877,
+              0.030281488,
+              0.039713897,
+              0.02969227,
+              -0.09310218,
+              0.051527865,
+              0.007809,
+              -0.05700871,
+              -0.041792583,
+              0.08987064,
+              -0.00813404,
+              -0.04082285,
+              -0.053487595,
+              -0.034378976,
+              -0.045253906,
+              -0.09715307,
+              -0.058194414,
+              0.06093547,
+              -0.009079956,
+              0.006918499,
+              0.012345728,
+              0.062036473,
+              -0.0060238577,
+              -0.0864295,
+              0.05872831,
+              0.053304974,
+              -0.05352623,
+              0.039521407,
+              -0.04498403,
+              0.0727911,
+              -0.039616212,
+              -0.05134442,
+              0.10334881,
+              0.02176773,
+              0.00016648973,
+              0.009423309,
+              0.022016358,
+              -0.006902813,
+              -0.128883,
+              -0.009864072,
+              -0.036396757,
+              -0.042481646,
+              0.004420737,
+              -0.047660243,
+              0.0065179355,
+              0.102602735,
+              -0.053166825,
+              0.07328581,
+              0.015810944,
+              -0.029149039,
+              0.025130944,
+              -0.063055776,
+              -0.043462534,
+              0.06719971,
+              0.014921177,
+              -0.0010985207,
+              -0.09869465,
+              -1.4682753e-08,
+              0.004611013,
+              -0.06715223,
+              0.07644809,
+              -0.019802453,
+              0.06737909,
+              0.044783685,
+              -0.050963327,
+              -0.0077186874,
+              -0.029319718,
+              0.028867716,
+              0.018877175,
+              -0.024279349,
+              0.04412064,
+              0.04416273,
+              0.03432814,
+              0.046517964,
+              0.02158077,
+              -0.001748483,
+              -0.0029956794,
+              0.014355785,
+              0.12525895,
+              0.03431845,
+              -0.014617591,
+              0.039184693,
+              -0.0023036227,
+              -0.014352919,
+              0.01010173,
+              0.02430961,
+              -0.041730728,
+              0.08832413,
+              -0.031459343,
+              0.030073628,
+              -0.0029376182,
+              0.0049478672,
+              0.09588392,
+              0.09396655,
+              0.01412568,
+              -0.077148266,
+              -0.039246846,
+              -0.01064901,
+              -0.008556093,
+              0.06409403,
+              -0.033037152,
+              -0.03049978,
+              0.0945846,
+              -0.008954658,
+              -0.029921891,
+              -0.132985,
+              0.059934624,
+              -0.011668423,
+              0.0071737366,
+              0.035627652,
+              0.0041028745,
+              0.056198087,
+              0.07656151,
+              -0.010067092,
+              0.05678312,
+              0.023536043,
+              -0.063770495,
+              0.08934554,
+              0.043756966,
+              0.04337246,
+              0.046287052,
+              -0.07039028
+            ],
+            "index": 0,
+            "object": "embedding"
+          }
+        ],
+        "model": "all-minilm:l6-v2",
+        "object": "list",
+        "usage": {
+          "prompt_tokens": 2,
+          "total_tokens": 2
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/Show more
+++ b/Show more