Merge branch 'main' into fix/issue-4005-installer-macos-permissions

2025-12-03 09:53:45 +00:00 · 2025-11-06 20:31:28 +02:00 · 2025-11-06 20:31:28 +02:00 · 03d85ae04a
commit 03d85ae04a
parent 9e74f450cc dc9497a3b2
599 changed files with 9777 additions and 370163 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -72,7 +72,8 @@ runs:
          echo "New recordings detected, committing and pushing"
          git add tests/integration/

-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
          git fetch origin ${{ github.ref_name }}
          git rebase origin/${{ github.ref_name }}
          echo "Rebased successfully"
@ -88,6 +89,8 @@ runs:
      run: |
        # Ollama logs (if ollama container exists)
        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        # vllm logs (if vllm container exists)
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
        # Note: distro container logs are now dumped in integration-tests.sh before container is removed

    - name: Upload logs
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@ -11,13 +11,14 @@ runs:
          --name vllm \
          -p 8000:8000 \
          --privileged=true \
-          quay.io/higginsd/vllm-cpu:65393ee064 \
+          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
          --host 0.0.0.0 \
          --port 8000 \
          --enable-auto-tool-choice \
-          --tool-call-parser llama3_json \
-          --model /root/.cache/Llama-3.2-1B-Instruct \
-          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+          --tool-call-parser hermes \
+          --model /root/.cache/Qwen3-0.6B \
+          --served-model-name Qwen/Qwen3-0.6B \
+          --max-model-len 8192

          # Wait for vllm to be ready
          echo "Waiting for vllm to be ready..."
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -23,10 +23,10 @@ on:
      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
      - 'scripts/integration-tests.sh'
+      - 'scripts/generate_ci_matrix.py'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
@ -44,8 +44,27 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Generate test matrix
+        id: set-matrix
+        run: |
+          # Generate matrix from CI_MATRIX in tests/integration/suites.py
+          # Supports schedule-based and manual input overrides
+          MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
+            --schedule "${{ github.event.schedule }}" \
+            --test-setup "${{ github.event.inputs.test-setup }}")
+          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
+          echo "Generated matrix: $MATRIX"

  run-replay-mode-tests:
+    needs: generate-matrix
    runs-on: ubuntu-latest
    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}

@ -56,18 +75,9 @@ jobs:
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
-        # Define (setup, suite) pairs - they are always matched and cannot be independent
-        # Weekly schedule (Sun 1 AM): vllm+base
-        # Input test-setup=ollama-vision: ollama-vision+vision
-        # Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses
-        config: >-
-          ${{
-            github.event.schedule == '1 0 * * 0'
-              && fromJSON('[{"setup": "vllm", "suite": "base"}]')
-            || github.event.inputs.test-setup == 'ollama-vision'
-              && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
-            || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "gpt", "suite": "responses"}]')
-          }}
+        # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
+        # See scripts/generate_ci_matrix.py for generation logic
+        config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}

    steps:
      - name: Checkout repository
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -165,3 +165,14 @@ jobs:
            echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
          fi
          exit $status
+
+      - name: Check if any unused recordings
+        run: |
+          set -e
+          PYTHONPATH=$PWD uv run ./scripts/cleanup_recordings.py --delete
+          changes=$(git status --short tests/integration | grep 'recordings' || true)
+          if [ -n "$changes" ]; then
+            echo "::error::Unused integration recordings detected. Run 'PYTHONPATH=$(pwd) uv run ./scripts/cleanup_recordings.py --delete' locally and commit the deletions."
+            echo "$changes"
+            exit 1
+          fi
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -9976,6 +9976,70 @@ components:
        - metadata
      title: VectorStoreObject
      description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
    "OpenAICreateVectorStoreRequestWithExtraBody":
      type: object
      properties:
@ -10001,15 +10065,7 @@ components:
          description: >-
            (Optional) Expiration policy for the vector store
        chunking_strategy:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
          description: >-
            (Optional) Strategy for splitting files into chunks
        metadata:
@ -10085,70 +10141,6 @@ components:
        - deleted
      title: VectorStoreDeleteResponse
      description: Response from deleting a vector store.
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
      type: object
      properties:
@ -10606,7 +10598,9 @@ components:
          description: >-
            Object type identifier for the search results page
        search_query:
-          type: string
+          type: array
+          items:
+            type: string
          description: >-
            The original search query that was executed
        data:
--- a/containers/Containerfile
+++ b/containers/Containerfile
@ -47,7 +47,7 @@ RUN set -eux; \
        exit 1; \
    fi

-RUN pip install --no-cache-dir uv
+RUN pip install --no-cache uv
 ENV UV_SYSTEM_PYTHON=1

 ENV INSTALL_MODE=${INSTALL_MODE}
@ -72,7 +72,7 @@ RUN set -eux; \
            echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
            exit 1; \
        fi; \
-        uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
+        uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \
    fi;

 # Install llama-stack
@ -88,22 +88,22 @@ RUN set -eux; \
        fi; \
        if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \
            UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \
-                uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
+                uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \
        else \
-            uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
+            uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \
        fi; \
    elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
-        uv pip install --no-cache-dir fastapi libcst; \
+        uv pip install --no-cache fastapi libcst; \
        if [ -n "$TEST_PYPI_VERSION" ]; then \
-            uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
+            uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
        else \
-            uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
+            uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
        fi; \
    else \
        if [ -n "$PYPI_VERSION" ]; then \
-            uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
+            uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \
        else \
-            uv pip install --no-cache-dir llama-stack; \
+            uv pip install --no-cache llama-stack; \
        fi; \
    fi;

@ -117,7 +117,7 @@ RUN set -eux; \
    fi; \
    deps="$(llama stack list-deps "$DISTRO_NAME")"; \
    if [ -n "$deps" ]; then \
-        printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
+        printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \
    fi

 # Cleanup
--- a/docs/docs/distributions/self_hosted_distro/starter.md
+++ b/docs/docs/distributions/self_hosted_distro/starter.md
@ -163,7 +163,41 @@ docker run \
  --port $LLAMA_STACK_PORT
 ```

-### Via venv
+The container will run the distribution with a SQLite store by default. This store is used for the following components:
+
+- Metadata store: store metadata about the models, providers, etc.
+- Inference store: collect of responses from the inference provider
+- Agents store: store agent configurations (sessions, turns, etc.)
+- Agents Responses store: store responses from the agents
+
+However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration:
+
+```bash
+docker run \
+  -it \
+  --pull always \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -e OPENAI_API_KEY=your_openai_key \
+  -e FIREWORKS_API_KEY=your_fireworks_key \
+  -e TOGETHER_API_KEY=your_together_key \
+  -e POSTGRES_HOST=your_postgres_host \
+  -e POSTGRES_PORT=your_postgres_port \
+  -e POSTGRES_DB=your_postgres_db \
+  -e POSTGRES_USER=your_postgres_user \
+  -e POSTGRES_PASSWORD=your_postgres_password \
+  llamastack/distribution-starter \
+  starter::run-with-postgres-store.yaml
+```
+
+Postgres environment variables:
+
+- `POSTGRES_HOST`: Postgres host (default: `localhost`)
+- `POSTGRES_PORT`: Postgres port (default: `5432`)
+- `POSTGRES_DB`: Postgres database name (default: `llamastack`)
+- `POSTGRES_USER`: Postgres username (default: `llamastack`)
+- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`)
+
+### Via Conda or venv

 Ensure you have configured the starter distribution using the environment variables explained above.

@ -171,8 +205,11 @@ Ensure you have configured the starter distribution using the environment variab
 # Install dependencies for the starter distribution
 uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install

-# Run the server
+# Run the server (with SQLite - default)
 uv run --with llama-stack llama stack run starter
+
+# Or run with PostgreSQL
+uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml
 ```

 ## Example Usage
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | API Key for the passthrouth endpoint |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
 | `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |

 ## Sample Configuration
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -9260,6 +9260,70 @@ components:
        - metadata
      title: VectorStoreObject
      description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
    "OpenAICreateVectorStoreRequestWithExtraBody":
      type: object
      properties:
@ -9285,15 +9349,7 @@ components:
          description: >-
            (Optional) Expiration policy for the vector store
        chunking_strategy:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
          description: >-
            (Optional) Strategy for splitting files into chunks
        metadata:
@ -9369,70 +9425,6 @@ components:
        - deleted
      title: VectorStoreDeleteResponse
      description: Response from deleting a vector store.
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
      type: object
      properties:
@ -9890,7 +9882,9 @@ components:
          description: >-
            Object type identifier for the search results page
        search_query:
-          type: string
+          type: array
+          items:
+            type: string
          description: >-
            The original search query that was executed
        data:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -9976,6 +9976,70 @@ components:
        - metadata
      title: VectorStoreObject
      description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
    "OpenAICreateVectorStoreRequestWithExtraBody":
      type: object
      properties:
@ -10001,15 +10065,7 @@ components:
          description: >-
            (Optional) Expiration policy for the vector store
        chunking_strategy:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
          description: >-
            (Optional) Strategy for splitting files into chunks
        metadata:
@ -10085,70 +10141,6 @@ components:
        - deleted
      title: VectorStoreDeleteResponse
      description: Response from deleting a vector store.
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
    "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
      type: object
      properties:
@ -10606,7 +10598,9 @@ components:
          description: >-
            Object type identifier for the search results page
        search_query:
-          type: string
+          type: array
+          items:
+            type: string
          description: >-
            The original search query that was executed
        data:
--- a/scripts/cleanup_recordings.py
+++ b/scripts/cleanup_recordings.py
@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Clean up unused test recordings based on CI test collection.
+
+This script:
+1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides)
+2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI
+3. Compares against existing recordings to identify unused ones
+4. Optionally deletes unused recordings
+
+Usage:
+    # Dry run - see what would be deleted
+    ./scripts/cleanup_recordings.py
+
+    # Save manifest of CI test IDs for inspection
+    ./scripts/cleanup_recordings.py --manifest ci_tests.txt
+
+    # Actually delete unused recordings
+    ./scripts/cleanup_recordings.py --delete
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import tempfile
+from collections import defaultdict
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).parent.parent
+
+# Load CI matrix from JSON file
+CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json"
+with open(CI_MATRIX_FILE) as f:
+    _matrix_config = json.load(f)
+
+DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"]
+SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {})
+
+
+def _unique_configs(entries):
+    seen: set[tuple[str, str]] = set()
+    for entry in entries:
+        suite = entry["suite"]
+        setup = entry["setup"]
+        key = (suite, setup)
+        if key in seen:
+            continue
+        seen.add(key)
+        yield {"suite": suite, "setup": setup}
+
+
+def iter_all_ci_configs() -> list[dict[str, str]]:
+    """Return unique CI configs across default and scheduled matrices."""
+    combined = list(DEFAULT_CI_MATRIX)
+    for configs in SCHEDULED_MATRICES.values():
+        combined.extend(configs)
+    return list(_unique_configs(combined))
+
+
+def collect_ci_tests():
+    """Collect all test IDs that would run in CI using --collect-only with JSON output."""
+
+    all_test_ids = set()
+    configs = iter_all_ci_configs()
+
+    for config in configs:
+        print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...")
+
+        # Create a temporary file for JSON report
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json_report_file = f.name
+
+        try:
+            # Configure environment for collection run
+            env = os.environ.copy()
+            env["PYTEST_ADDOPTS"] = f"--json-report --json-report-file={json_report_file}"
+            repo_path = str(REPO_ROOT)
+            existing_path = env.get("PYTHONPATH", "")
+            env["PYTHONPATH"] = f"{repo_path}{os.pathsep}{existing_path}" if existing_path else repo_path
+
+            result = subprocess.run(
+                [
+                    "./scripts/integration-tests.sh",
+                    "--collect-only",
+                    "--suite",
+                    config["suite"],
+                    "--setup",
+                    config["setup"],
+                ],
+                capture_output=True,
+                text=True,
+                cwd=REPO_ROOT,
+                env=env,
+            )
+
+            if result.returncode != 0:
+                raise RuntimeError(
+                    "Test collection failed.\n"
+                    f"Command: {' '.join(result.args)}\n"
+                    f"stdout:\n{result.stdout}\n"
+                    f"stderr:\n{result.stderr}"
+                )
+
+            # Parse JSON report to extract test IDs
+            try:
+                with open(json_report_file) as f:
+                    report = json.load(f)
+
+                # The "collectors" field contains collected test items
+                # Each collector has a "result" array with test node IDs
+                for collector in report.get("collectors", []):
+                    for item in collector.get("result", []):
+                        # The "nodeid" field is the test ID
+                        if "nodeid" in item:
+                            all_test_ids.add(item["nodeid"])
+
+                print(f"  Collected {len(all_test_ids)} test IDs so far")
+
+            except (json.JSONDecodeError, FileNotFoundError) as e:
+                print(f"  Warning: Failed to parse JSON report: {e}")
+                continue
+
+        finally:
+            # Clean up temp file
+            if os.path.exists(json_report_file):
+                os.unlink(json_report_file)
+
+    print(f"\nTotal unique test IDs collected: {len(all_test_ids)}")
+    return all_test_ids, configs
+
+
+def get_base_test_id(test_id: str) -> str:
+    """Extract base test ID without parameterization.
+
+    Example:
+      'tests/integration/inference/test_foo.py::test_bar[param1-param2]'
+      -> 'tests/integration/inference/test_foo.py::test_bar'
+    """
+    return test_id.split("[")[0] if "[" in test_id else test_id
+
+
+def find_all_recordings():
+    """Find all recording JSON files."""
+    return list((REPO_ROOT / "tests/integration").rglob("recordings/*.json"))
+
+
+def analyze_recordings(ci_test_ids, dry_run=True):
+    """Analyze recordings and identify unused ones."""
+
+    # Use full test IDs with parameterization for exact matching
+    all_recordings = find_all_recordings()
+    print(f"\nTotal recording files: {len(all_recordings)}")
+
+    # Categorize recordings
+    used_recordings = []
+    unused_recordings = []
+    shared_recordings = []  # model-list endpoints without test_id
+    parse_errors = []
+
+    for json_file in all_recordings:
+        try:
+            with open(json_file) as f:
+                data = json.load(f)
+
+            test_id = data.get("test_id", "")
+
+            if not test_id:
+                # Shared/infrastructure recordings (model lists, etc)
+                shared_recordings.append(json_file)
+                continue
+
+            # Match exact test_id (with full parameterization)
+            if test_id in ci_test_ids:
+                used_recordings.append(json_file)
+            else:
+                unused_recordings.append((json_file, test_id))
+
+        except Exception as e:
+            parse_errors.append((json_file, str(e)))
+
+    # Print summary
+    print("\nRecording Analysis:")
+    print(f"  Used in CI:     {len(used_recordings)}")
+    print(f"  Shared (no ID): {len(shared_recordings)}")
+    print(f"  UNUSED:         {len(unused_recordings)}")
+    print(f"  Parse errors:   {len(parse_errors)}")
+
+    if unused_recordings:
+        print("\nUnused recordings by test:")
+
+        # Group by base test ID
+        by_test = defaultdict(list)
+        for file, test_id in unused_recordings:
+            base = get_base_test_id(test_id)
+            by_test[base].append(file)
+
+        for base_test, files in sorted(by_test.items()):
+            print(f"\n  {base_test}")
+            print(f"    ({len(files)} recording(s))")
+            for f in files[:3]:
+                print(f"      - {f.relative_to(REPO_ROOT / 'tests/integration')}")
+            if len(files) > 3:
+                print(f"      ... and {len(files) - 3} more")
+
+    if parse_errors:
+        print("\nParse errors:")
+        for file, error in parse_errors[:5]:
+            print(f"  {file.relative_to(REPO_ROOT)}: {error}")
+        if len(parse_errors) > 5:
+            print(f"  ... and {len(parse_errors) - 5} more")
+
+    # Perform cleanup
+    if not dry_run:
+        print(f"\nDeleting {len(unused_recordings)} unused recordings...")
+        for file, _ in unused_recordings:
+            file.unlink()
+            print(f"  Deleted: {file.relative_to(REPO_ROOT / 'tests/integration')}")
+        print("✅ Cleanup complete")
+    else:
+        print("\n(Dry run - no files deleted)")
+        print("\nTo delete these files, run with --delete")
+
+    return len(unused_recordings)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Clean up unused test recordings based on CI test collection",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument("--delete", action="store_true", help="Actually delete unused recordings (default is dry-run)")
+    parser.add_argument("--manifest", help="Save collected test IDs to file (optional)")
+
+    args = parser.parse_args()
+
+    print("=" * 60)
+    print("Recording Cleanup Utility")
+    print("=" * 60)
+
+    ci_configs = iter_all_ci_configs()
+
+    print(f"\nDetected CI configurations: {len(ci_configs)}")
+    for config in ci_configs:
+        print(f"  - suite={config['suite']}, setup={config['setup']}")
+
+    # Collect test IDs from CI configurations
+    ci_test_ids, _ = collect_ci_tests()
+
+    if args.manifest:
+        with open(args.manifest, "w") as f:
+            for test_id in sorted(ci_test_ids):
+                f.write(f"{test_id}\n")
+        print(f"\nSaved test IDs to: {args.manifest}")
+
+    # Analyze and cleanup
+    unused_count = analyze_recordings(ci_test_ids, dry_run=not args.delete)
+
+    print("\n" + "=" * 60)
+    if unused_count > 0 and not args.delete:
+        print("Run with --delete to remove unused recordings")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/generate_ci_matrix.py
+++ b/scripts/generate_ci_matrix.py
@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Generate CI test matrix from ci_matrix.json with schedule/input overrides.
+
+This script is used by .github/workflows/integration-tests.yml to generate
+the test matrix dynamically based on the CI_MATRIX definition.
+"""
+
+import json
+from pathlib import Path
+
+CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json"
+
+with open(CI_MATRIX_FILE) as f:
+    matrix_config = json.load(f)
+
+DEFAULT_MATRIX = matrix_config["default"]
+SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {})
+
+
+def generate_matrix(schedule="", test_setup=""):
+    """
+    Generate test matrix based on schedule or manual input.
+
+    Args:
+        schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly)
+        test_setup: Manual test setup input (e.g., "ollama-vision")
+
+    Returns:
+        Matrix configuration as JSON string
+    """
+    # Weekly scheduled test matrices
+    if schedule and schedule in SCHEDULE_MATRICES:
+        matrix = SCHEDULE_MATRICES[schedule]
+    # Manual input for specific setup
+    elif test_setup == "ollama-vision":
+        matrix = [{"suite": "vision", "setup": "ollama-vision"}]
+    # Default: use JSON-defined matrix
+    else:
+        matrix = DEFAULT_MATRIX
+
+    # GitHub Actions expects {"include": [...]} format
+    return json.dumps({"include": matrix})
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate CI test matrix")
+    parser.add_argument("--schedule", default="", help="GitHub schedule cron string")
+    parser.add_argument("--test-setup", default="", help="Manual test setup input")
+
+    args = parser.parse_args()
+
+    print(generate_matrix(args.schedule, args.test_setup))
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -227,14 +227,16 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
    echo "=== Starting Llama Stack Server ==="
    export LLAMA_STACK_LOG_WIDTH=120

-    # Configure telemetry collector for server mode
-    # Use a fixed port for the OTEL collector so the server can connect to it
-    COLLECTOR_PORT=4317
-    export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
-    export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
-    export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
-    export OTEL_BSP_SCHEDULE_DELAY="200"
-    export OTEL_BSP_EXPORT_TIMEOUT="2000"
+        # Configure telemetry collector for server mode
+        # Use a fixed port for the OTEL collector so the server can connect to it
+        COLLECTOR_PORT=4317
+        export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
+        # Disabled: https://github.com/llamastack/llama-stack/issues/4089
+        #export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
+        export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
+        export OTEL_BSP_SCHEDULE_DELAY="200"
+        export OTEL_BSP_EXPORT_TIMEOUT="2000"
+        export OTEL_METRIC_EXPORT_INTERVAL="200"

    # remove "server:" from STACK_CONFIG
    stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
@ -336,7 +338,11 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    DOCKER_ENV_VARS=""
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
-    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
+    # Disabled: https://github.com/llamastack/llama-stack/issues/4089
+    #DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
+    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_METRIC_EXPORT_INTERVAL=200"
+    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_SCHEDULE_DELAY=200"
+    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_BSP_EXPORT_TIMEOUT=2000"

    # Pass through API keys if they exist
    [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
@ -349,6 +355,10 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    [ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
    [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"

+    if [[ "$TEST_SETUP" == "vllm" ]]; then
+        DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e VLLM_URL=http://localhost:8000/v1"
+    fi
+
    # Determine the actual image name (may have localhost/ prefix)
    IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
    if [[ -z "$IMAGE_NAME" ]]; then
@ -401,11 +411,6 @@ fi
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"

-# Additional exclusions for vllm setup
-if [[ "$TEST_SETUP" == "vllm" ]]; then
-    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
-fi
-
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
 if [[ -n "$TEST_PATTERN" ]]; then
    PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
--- a/src/llama_stack/apis/conversations/init.py
+++ b/src/llama_stack/apis/conversations/init.py
@ -6,26 +6,22 @@

 from .conversations import (
    Conversation,
-    ConversationCreateRequest,
    ConversationDeletedResource,
    ConversationItem,
    ConversationItemCreateRequest,
    ConversationItemDeletedResource,
    ConversationItemList,
    Conversations,
-    ConversationUpdateRequest,
    Metadata,
 )

 __all__ = [
    "Conversation",
-    "ConversationCreateRequest",
    "ConversationDeletedResource",
    "ConversationItem",
    "ConversationItemCreateRequest",
    "ConversationItemDeletedResource",
    "ConversationItemList",
    "Conversations",
-    "ConversationUpdateRequest",
    "Metadata",
 ]
--- a/src/llama_stack/apis/conversations/conversations.py
+++ b/src/llama_stack/apis/conversations/conversations.py
@ -102,32 +102,6 @@ register_schema(ConversationItem, name="ConversationItem")
 # ]


-@json_schema_type
-class ConversationCreateRequest(BaseModel):
-    """Request body for creating a conversation."""
-
-    items: list[ConversationItem] | None = Field(
-        default=[],
-        description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
-        max_length=20,
-    )
-    metadata: Metadata | None = Field(
-        default={},
-        description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
-        max_length=16,
-    )
-
-
-@json_schema_type
-class ConversationUpdateRequest(BaseModel):
-    """Request body for updating a conversation."""
-
-    metadata: Metadata = Field(
-        ...,
-        description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
-    )
-
-
@json_schema_type
 class ConversationDeletedResource(BaseModel):
    """Response for deleted conversation."""
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -260,7 +260,7 @@ class VectorStoreSearchResponsePage(BaseModel):
    """

    object: str = "vector_store.search_results.page"
-    search_query: str
+    search_query: list[str]
    data: list[VectorStoreSearchResponse]
    has_more: bool = False
    next_page: str | None = None
@ -478,7 +478,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
    name: str | None = None
    file_ids: list[str] | None = None
    expires_after: dict[str, Any] | None = None
-    chunking_strategy: dict[str, Any] | None = None
+    chunking_strategy: VectorStoreChunkingStrategy | None = None
    metadata: dict[str, Any] | None = None


--- a/src/llama_stack/cli/stack/list_deps.py
+++ b/src/llama_stack/cli/stack/list_deps.py
@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
    def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
        # always keep implementation completely silo-ed away from CLI so CLI
        # can be fast to load and reduces dependencies
+        if not args.config and not args.providers:
+            self.parser.print_help()
+            self.parser.exit()
+
        from ._list_deps import run_stack_list_deps_command

        return run_stack_list_deps_command(args)
--- a/src/llama_stack/cli/stack/list_stacks.py
+++ b/src/llama_stack/cli/stack/list_stacks.py
@ -9,48 +9,69 @@ from pathlib import Path

 from llama_stack.cli.subcommand import Subcommand
 from llama_stack.cli.table import print_table
+from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR


 class StackListBuilds(Subcommand):
-    """List built stacks in .llama/distributions directory"""
+    """List available distributions (both built-in and custom)"""

    def __init__(self, subparsers: argparse._SubParsersAction):
        super().__init__()
        self.parser = subparsers.add_parser(
            "list",
            prog="llama stack list",
-            description="list the build stacks",
+            description="list available distributions",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        )
        self._add_arguments()
        self.parser.set_defaults(func=self._list_stack_command)

-    def _get_distribution_dirs(self) -> dict[str, Path]:
-        """Return a dictionary of distribution names and their paths"""
-        distributions = {}
-        dist_dir = Path.home() / ".llama" / "distributions"
+    def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]:
+        """Return a dictionary of distribution names and their paths with source type
+
+        Returns:
+            dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom'
+        """
+        distributions = {}
+
+        # Get built-in distributions from source code
+        distro_dir = Path(__file__).parent.parent.parent / "distributions"
+        if distro_dir.exists():
+            for stack_dir in distro_dir.iterdir():
+                if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"):
+                    distributions[stack_dir.name] = (stack_dir, "built-in")
+
+        # Get custom/run distributions from ~/.llama/distributions
+        # These override built-in ones if they have the same name
+        if DISTRIBS_BASE_DIR.exists():
+            for stack_dir in DISTRIBS_BASE_DIR.iterdir():
+                if stack_dir.is_dir() and not stack_dir.name.startswith("."):
+                    # Clean up the name (remove llamastack- prefix if present)
+                    name = stack_dir.name.replace("llamastack-", "")
+                    distributions[name] = (stack_dir, "custom")

-        if dist_dir.exists():
-            for stack_dir in dist_dir.iterdir():
-                if stack_dir.is_dir():
-                    distributions[stack_dir.name] = stack_dir
        return distributions

    def _list_stack_command(self, args: argparse.Namespace) -> None:
        distributions = self._get_distribution_dirs()

        if not distributions:
-            print("No stacks found in ~/.llama/distributions")
+            print("No distributions found")
            return

-        headers = ["Stack Name", "Path"]
-        headers.extend(["Build Config", "Run Config"])
+        headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"]
        rows = []
-        for name, path in distributions.items():
-            row = [name, str(path)]
+        for name, (path, source_type) in sorted(distributions.items()):
+            row = [name, source_type, str(path)]
            # Check for build and run config files
-            build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
-            run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
+            # For built-in distributions, configs are named build.yaml and run.yaml
+            # For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
+            if source_type == "built-in":
+                build_config = "Yes" if (path / "build.yaml").exists() else "No"
+                run_config = "Yes" if (path / "run.yaml").exists() else "No"
+            else:
+                build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
+                run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
            row.extend([build_config, run_config])
            rows.append(row)
        print_table(rows, headers, separate_rows=True)
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -20,6 +20,8 @@ from llama_stack.apis.vector_io import (
    SearchRankingOptions,
    VectorIO,
    VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
    VectorStoreFileContentsResponse,
@ -167,6 +169,13 @@ class VectorIORouter(VectorIO):
        if embedding_dimension is not None:
            params.model_extra["embedding_dimension"] = embedding_dimension

+        # Set chunking strategy explicitly if not provided
+        if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
+            # actualize the chunking strategy to static
+            params.chunking_strategy = VectorStoreChunkingStrategyStatic(
+                static=VectorStoreChunkingStrategyStaticConfig()
+            )
+
        return await provider.openai_create_vector_store(params)

    async def openai_list_vector_stores(
@ -283,6 +292,8 @@ class VectorIORouter(VectorIO):
        chunking_strategy: VectorStoreChunkingStrategy | None = None,
    ) -> VectorStoreFileObject:
        logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
+        if chunking_strategy is None or chunking_strategy.type == "auto":
+            chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
        provider = await self.routing_table.get_provider_impl(vector_store_id)
        return await provider.openai_attach_file_to_vector_store(
            vector_store_id=vector_store_id,
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@ -427,6 +427,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
    "counters": {},
    "gauges": {},
    "up_down_counters": {},
+    "histograms": {},
 }
 _global_lock = threading.Lock()
 _TRACER_PROVIDER = None
@ -540,6 +541,16 @@ class Telemetry:
            )
        return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])

+    def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
+        assert self.meter is not None
+        if name not in _GLOBAL_STORAGE["histograms"]:
+            _GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
+                name=name,
+                unit=unit,
+                description=f"Histogram for {name}",
+            )
+        return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
+
    def _log_metric(self, event: MetricEvent) -> None:
        # Add metric as an event to the current span
        try:
@ -571,7 +582,16 @@ class Telemetry:
        # Log to OpenTelemetry meter if available
        if self.meter is None:
            return
-        if isinstance(event.value, int):
+
+        # Use histograms for token-related metrics (per-request measurements)
+        # Use counters for other cumulative metrics
+        token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
+
+        if event.metric in token_metrics:
+            # Token metrics are per-request measurements, use histogram
+            histogram = self._get_or_create_histogram(event.metric, event.unit)
+            histogram.record(event.value, attributes=_clean_attributes(event.attributes))
+        elif isinstance(event.value, int):
            counter = self._get_or_create_counter(event.metric, event.unit)
            counter.add(event.value, attributes=_clean_attributes(event.attributes))
        elif isinstance(event.value, float):
--- a/src/llama_stack/core/utils/config_resolution.py
+++ b/src/llama_stack/core/utils/config_resolution.py
@ -52,7 +52,17 @@ def resolve_config_or_distro(
            logger.debug(f"Using distribution: {distro_config}")
            return distro_config

-    # Strategy 3: Try as built distribution name
+    # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
+    # eg: starter::run-with-postgres-store.yaml
+    # Use :: to avoid slash and confusion with a filesystem path
+    if "::" in config_or_distro:
+        distro_name, config_name = config_or_distro.split("::")
+        distro_config = _get_distro_config_path(distro_name, config_name)
+        if distro_config.exists():
+            logger.info(f"Using distribution: {distro_config}")
+            return distro_config
+
+    # Strategy 4: Try as built distribution name
    distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
    if distrib_config.exists():
        logger.debug(f"Using built distribution: {distrib_config}")
@ -63,13 +73,15 @@ def resolve_config_or_distro(
        logger.debug(f"Using built distribution: {distrib_config}")
        return distrib_config

-    # Strategy 4: Failed - provide helpful error
+    # Strategy 5: Failed - provide helpful error
    raise ValueError(_format_resolution_error(config_or_distro, mode))


-def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
+def _get_distro_config_path(distro_name: str, mode: str) -> Path:
    """Get the config file path for a distro."""
-    return DISTRO_DIR / distro_name / f"{mode}.yaml"
+    if not mode.endswith(".yaml"):
+        mode = f"{mode}.yaml"
+    return DISTRO_DIR / distro_name / mode


 def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
--- a/src/llama_stack/core/utils/exec.py
+++ b/src/llama_stack/core/utils/exec.py
@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
            text=True,
            check=False,
        )
+
+        # Print stdout and stderr if command failed
+        if result.returncode != 0:
+            log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
+            if result.stdout:
+                log.error(f"STDOUT: {result.stdout}")
+            if result.stderr:
+                log.error(f"STDERR: {result.stderr}")
+
        return result.returncode
    except subprocess.SubprocessError as e:
        log.error(f"Subprocess error: {e}")
--- a/src/llama_stack/distributions/ci-tests/build.yaml
+++ b/src/llama_stack/distributions/ci-tests/build.yaml
@ -56,4 +56,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
--- a/src/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/src/llama_stack/distributions/ci-tests/ci_tests.py
@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
 def get_distribution_template() -> DistributionTemplate:
    template = get_starter_distribution_template(name="ci-tests")
    template.description = "CI tests for Llama Stack"
+    template.run_configs.pop("run-with-postgres-store.yaml", None)

    return template
--- a/src/llama_stack/distributions/postgres-demo/init.py
+++ b/src/llama_stack/distributions/postgres-demo/init.py
@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .postgres_demo import get_distribution_template  # noqa: F401
--- a/src/llama_stack/distributions/postgres-demo/build.yaml
+++ b/src/llama_stack/distributions/postgres-demo/build.yaml
@ -1,23 +0,0 @@
-version: 2
-distribution_spec:
-  description: Quick start template for running Llama Stack with several popular providers
-  providers:
-    inference:
-    - provider_type: remote::vllm
-    - provider_type: inline::sentence-transformers
-    vector_io:
-    - provider_type: remote::chromadb
-    safety:
-    - provider_type: inline::llama-guard
-    agents:
-    - provider_type: inline::meta-reference
-    tool_runtime:
-    - provider_type: remote::brave-search
-    - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
-    - provider_type: remote::model-context-protocol
-image_type: venv
-additional_pip_packages:
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]
--- a/src/llama_stack/distributions/postgres-demo/postgres_demo.py
+++ b/src/llama_stack/distributions/postgres-demo/postgres_demo.py
@ -1,125 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    BuildProvider,
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.distributions.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
-from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers = [
-        Provider(
-            provider_id="vllm-inference",
-            provider_type="remote::vllm",
-            config=VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL:=http://localhost:8000/v1}",
-            ),
-        ),
-    ]
-    providers = {
-        "inference": [
-            BuildProvider(provider_type="remote::vllm"),
-            BuildProvider(provider_type="inline::sentence-transformers"),
-        ],
-        "vector_io": [BuildProvider(provider_type="remote::chromadb")],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-    }
-    name = "postgres-demo"
-
-    vector_io_providers = [
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}",
-                url="${env.CHROMADB_URL:=}",
-            ),
-        ),
-    ]
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    default_models = [
-        ModelInput(
-            model_id="${env.INFERENCE_MODEL}",
-            provider_id="vllm-inference",
-        )
-    ]
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    embedding_model = ModelInput(
-        model_id="nomic-embed-text-v1.5",
-        provider_id=embedding_provider.provider_id,
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-        },
-    )
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Quick start template for running Llama Stack with several popular providers",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider={},
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": inference_providers + [embedding_provider],
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                storage_backends={
-                    "kv_default": PostgresKVStoreConfig.sample_run_config(
-                        table_name="llamastack_kvstore",
-                    ),
-                    "sql_default": PostgresSqlStoreConfig.sample_run_config(),
-                },
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-        },
-    )
--- a/src/llama_stack/distributions/starter-gpu/build.yaml
+++ b/src/llama_stack/distributions/starter-gpu/build.yaml
@ -57,4 +57,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@ -0,0 +1,281 @@
+version: 2
+image_name: starter-gpu
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      responses_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+  post_training:
+  - provider_id: huggingface-gpu
+    provider_type: inline::huggingface-gpu
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+      dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_postgres
+storage:
+  backends:
+    kv_postgres:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_postgres:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_postgres
+    inference:
+      table_name: inference_store
+      backend: sql_postgres
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_postgres
+    prompts:
+      namespace: prompts
+      backend: kv_postgres
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups: []
+server:
+  port: 8321
+telemetry:
+  enabled: true
--- a/src/llama_stack/distributions/starter/build.yaml
+++ b/src/llama_stack/distributions/starter/build.yaml
@ -57,4 +57,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@ -0,0 +1,278 @@
+version: 2
+image_name: starter
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      responses_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_postgres
+storage:
+  backends:
+    kv_postgres:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_postgres:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_postgres
+    inference:
+      table_name: inference_store
+      backend: sql_postgres
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_postgres
+    prompts:
+      namespace: prompts
+      backend: kv_postgres
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups: []
+server:
+  port: 8321
+telemetry:
+  enabled: true
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@ -17,6 +17,11 @@ from llama_stack.core.datatypes import (
    ToolGroupInput,
    VectorStoresConfig,
 )
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    SqlStoreReference,
+)
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.datatypes import RemoteProviderSpec
@ -36,6 +41,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
 )
 from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
+from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig


@ -181,6 +187,62 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
        ),
    ]
+    postgres_config = PostgresSqlStoreConfig.sample_run_config()
+    default_overrides = {
+        "inference": remote_inference_providers + [embedding_provider],
+        "vector_io": [
+            Provider(
+                provider_id="faiss",
+                provider_type="inline::faiss",
+                config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="sqlite-vec",
+                provider_type="inline::sqlite-vec",
+                config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="${env.MILVUS_URL:+milvus}",
+                provider_type="inline::milvus",
+                config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="${env.CHROMADB_URL:+chromadb}",
+                provider_type="remote::chromadb",
+                config=ChromaVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}/",
+                    url="${env.CHROMADB_URL:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.PGVECTOR_DB:+pgvector}",
+                provider_type="remote::pgvector",
+                config=PGVectorVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    db="${env.PGVECTOR_DB:=}",
+                    user="${env.PGVECTOR_USER:=}",
+                    password="${env.PGVECTOR_PASSWORD:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.QDRANT_URL:+qdrant}",
+                provider_type="remote::qdrant",
+                config=QdrantVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    url="${env.QDRANT_URL:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
+                provider_type="remote::weaviate",
+                config=WeaviateVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
+                ),
+            ),
+        ],
+        "files": [files_provider],
+    }

    return DistributionTemplate(
        name=name,
@ -189,64 +251,10 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
-        additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
+        additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
        run_configs={
            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": remote_inference_providers + [embedding_provider],
-                    "vector_io": [
-                        Provider(
-                            provider_id="faiss",
-                            provider_type="inline::faiss",
-                            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="sqlite-vec",
-                            provider_type="inline::sqlite-vec",
-                            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.MILVUS_URL:+milvus}",
-                            provider_type="inline::milvus",
-                            config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.CHROMADB_URL:+chromadb}",
-                            provider_type="remote::chromadb",
-                            config=ChromaVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}/",
-                                url="${env.CHROMADB_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.PGVECTOR_DB:+pgvector}",
-                            provider_type="remote::pgvector",
-                            config=PGVectorVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                db="${env.PGVECTOR_DB:=}",
-                                user="${env.PGVECTOR_USER:=}",
-                                password="${env.PGVECTOR_PASSWORD:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.QDRANT_URL:+qdrant}",
-                            provider_type="remote::qdrant",
-                            config=QdrantVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                url="${env.QDRANT_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
-                            provider_type="remote::weaviate",
-                            config=WeaviateVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
-                            ),
-                        ),
-                    ],
-                    "files": [files_provider],
-                },
+                provider_overrides=default_overrides,
                default_models=[],
                default_tool_groups=default_tool_groups,
                default_shields=default_shields,
@ -261,6 +269,55 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                    default_shield_id="llama-guard",
                ),
            ),
+            "run-with-postgres-store.yaml": RunConfigSettings(
+                provider_overrides={
+                    **default_overrides,
+                    "agents": [
+                        Provider(
+                            provider_id="meta-reference",
+                            provider_type="inline::meta-reference",
+                            config=dict(
+                                persistence_store=postgres_config,
+                                responses_store=postgres_config,
+                            ),
+                        )
+                    ],
+                    "batches": [
+                        Provider(
+                            provider_id="reference",
+                            provider_type="inline::reference",
+                            config=dict(
+                                kvstore=KVStoreReference(
+                                    backend="kv_postgres",
+                                    namespace="batches",
+                                ).model_dump(exclude_none=True),
+                            ),
+                        )
+                    ],
+                },
+                storage_backends={
+                    "kv_postgres": PostgresKVStoreConfig.sample_run_config(),
+                    "sql_postgres": postgres_config,
+                },
+                storage_stores={
+                    "metadata": KVStoreReference(
+                        backend="kv_postgres",
+                        namespace="registry",
+                    ).model_dump(exclude_none=True),
+                    "inference": InferenceStoreReference(
+                        backend="sql_postgres",
+                        table_name="inference_store",
+                    ).model_dump(exclude_none=True),
+                    "conversations": SqlStoreReference(
+                        backend="sql_postgres",
+                        table_name="openai_conversations",
+                    ).model_dump(exclude_none=True),
+                    "prompts": KVStoreReference(
+                        backend="kv_postgres",
+                        namespace="prompts",
+                    ).model_dump(exclude_none=True),
+                },
+            ),
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
--- a/src/llama_stack/providers/remote/inference/passthrough/init.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/init.py
@ -10,8 +10,8 @@ from .config import PassthroughImplConfig


 class PassthroughProviderDataValidator(BaseModel):
-    url: str
-    api_key: str
+    passthrough_url: str
+    passthrough_api_key: str


 async def get_adapter_impl(config: PassthroughImplConfig, _deps):
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@ -6,7 +6,7 @@

 from typing import Any

-from pydantic import Field, SecretStr
+from pydantic import Field

 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.schema_utils import json_schema_type
@ -19,11 +19,6 @@ class PassthroughImplConfig(RemoteInferenceProviderConfig):
        description="The URL for the passthrough endpoint",
    )

-    api_key: SecretStr | None = Field(
-        default=None,
-        description="API Key for the passthrouth endpoint",
-    )
-
    @classmethod
    def sample_run_config(
        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@ -5,9 +5,8 @@
 # the root directory of this source tree.

 from collections.abc import AsyncIterator
-from typing import Any

-from llama_stack_client import AsyncLlamaStackClient
+from openai import AsyncOpenAI

 from llama_stack.apis.inference import (
    Inference,
@ -20,103 +19,117 @@ from llama_stack.apis.inference import (
    OpenAIEmbeddingsResponse,
 )
 from llama_stack.apis.models import Model
-from llama_stack.core.library_client import convert_pydantic_to_json_value
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.core.request_headers import NeedsRequestProviderData

 from .config import PassthroughImplConfig


-class PassthroughInferenceAdapter(Inference):
+class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
    def __init__(self, config: PassthroughImplConfig) -> None:
-        ModelRegistryHelper.__init__(self)
        self.config = config

+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
    async def unregister_model(self, model_id: str) -> None:
        pass

    async def register_model(self, model: Model) -> Model:
        return model

-    def _get_client(self) -> AsyncLlamaStackClient:
-        passthrough_url = None
-        passthrough_api_key = None
-        provider_data = None
+    async def list_models(self) -> list[Model]:
+        """List models by calling the downstream /v1/models endpoint."""
+        client = self._get_openai_client()

-        if self.config.url is not None:
-            passthrough_url = self.config.url
-        else:
-            provider_data = self.get_request_provider_data()
-            if provider_data is None or not provider_data.passthrough_url:
-                raise ValueError(
-                    'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
-                )
-            passthrough_url = provider_data.passthrough_url
+        response = await client.models.list()

-        if self.config.api_key is not None:
-            passthrough_api_key = self.config.api_key.get_secret_value()
-        else:
-            provider_data = self.get_request_provider_data()
-            if provider_data is None or not provider_data.passthrough_api_key:
-                raise ValueError(
-                    'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
-                )
-            passthrough_api_key = provider_data.passthrough_api_key
+        # Convert from OpenAI format to Llama Stack Model format
+        models = []
+        for model_data in response.data:
+            downstream_model_id = model_data.id
+            custom_metadata = getattr(model_data, "custom_metadata", {}) or {}

-        return AsyncLlamaStackClient(
-            base_url=passthrough_url,
-            api_key=passthrough_api_key,
-            provider_data=provider_data,
+            # Prefix identifier with provider ID for local registry
+            local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
+
+            model = Model(
+                identifier=local_identifier,
+                provider_id=self.__provider_id__,
+                provider_resource_id=downstream_model_id,
+                model_type=custom_metadata.get("model_type", "llm"),
+                metadata=custom_metadata,
+            )
+            models.append(model)
+
+        return models
+
+    async def should_refresh_models(self) -> bool:
+        """Passthrough should refresh models since they come from downstream dynamically."""
+        return self.config.refresh_models
+
+    def _get_openai_client(self) -> AsyncOpenAI:
+        """Get an AsyncOpenAI client configured for the downstream server."""
+        base_url = self._get_passthrough_url()
+        api_key = self._get_passthrough_api_key()
+
+        return AsyncOpenAI(
+            base_url=f"{base_url.rstrip('/')}/v1",
+            api_key=api_key,
        )

-    async def openai_embeddings(
-        self,
-        params: OpenAIEmbeddingsRequestWithExtraBody,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    def _get_passthrough_url(self) -> str:
+        """Get the passthrough URL from config or provider data."""
+        if self.config.url is not None:
+            return self.config.url
+
+        provider_data = self.get_request_provider_data()
+        if provider_data is None:
+            raise ValueError(
+                'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
+            )
+        return provider_data.passthrough_url
+
+    def _get_passthrough_api_key(self) -> str:
+        """Get the passthrough API key from config or provider data."""
+        if self.config.auth_credential is not None:
+            return self.config.auth_credential.get_secret_value()
+
+        provider_data = self.get_request_provider_data()
+        if provider_data is None:
+            raise ValueError(
+                'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
+            )
+        return provider_data.passthrough_api_key

    async def openai_completion(
        self,
        params: OpenAICompletionRequestWithExtraBody,
    ) -> OpenAICompletion:
-        client = self._get_client()
-        model_obj = await self.model_store.get_model(params.model)
-
-        params = params.model_copy()
-        params.model = model_obj.provider_resource_id
-
+        """Forward completion request to downstream using OpenAI client."""
+        client = self._get_openai_client()
        request_params = params.model_dump(exclude_none=True)
-
-        return await client.inference.openai_completion(**request_params)
+        response = await client.completions.create(**request_params)
+        return response  # type: ignore

    async def openai_chat_completion(
        self,
        params: OpenAIChatCompletionRequestWithExtraBody,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        client = self._get_client()
-        model_obj = await self.model_store.get_model(params.model)
-
-        params = params.model_copy()
-        params.model = model_obj.provider_resource_id
-
+        """Forward chat completion request to downstream using OpenAI client."""
+        client = self._get_openai_client()
        request_params = params.model_dump(exclude_none=True)
+        response = await client.chat.completions.create(**request_params)
+        return response  # type: ignore

-        return await client.inference.openai_chat_completion(**request_params)
-
-    def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
-        json_params = {}
-        for key, value in request_params.items():
-            json_input = convert_pydantic_to_json_value(value)
-            if isinstance(json_input, dict):
-                json_input = {k: v for k, v in json_input.items() if v is not None}
-            elif isinstance(json_input, list):
-                json_input = [x for x in json_input if x is not None]
-                new_input = []
-                for x in json_input:
-                    if isinstance(x, dict):
-                        x = {k: v for k, v in x.items() if v is not None}
-                    new_input.append(x)
-                json_input = new_input
-
-            json_params[key] = json_input
-
-        return json_params
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        """Forward embeddings request to downstream using OpenAI client."""
+        client = self._get_openai_client()
+        request_params = params.model_dump(exclude_none=True)
+        response = await client.embeddings.create(**request_params)
+        return response  # type: ignore
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
            # ...
            provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
            if "embedding" in functions:
-                embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
-                context_length = model_spec["model_limits"]["max_sequence_length"]
+                embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
+                context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
                embedding_metadata = {
                    "embedding_dimension": embedding_dimension,
                    "context_length": context_length,
@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
                    metadata={},
                    model_type=ModelType.llm,
                )
-                # In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
-                # In that case, the cache will record the generator Model object, and the list which we return will have
-                # both the generator Model object and the text chat Model object.  That's fine because the cache is
-                # only used for check_model_availability() anyway.
                self._model_cache[provider_resource_id] = model
                models.append(model)
        return models
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -26,6 +26,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreChunkingStrategy,
    VectorStoreChunkingStrategyAuto,
    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreContent,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
@ -414,6 +415,10 @@ class OpenAIVectorStoreMixin(ABC):
            in_progress=0,
            total=0,
        )
+        if not params.chunking_strategy or params.chunking_strategy.type == "auto":
+            chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
+        else:
+            chunking_strategy = params.chunking_strategy
        store_info: dict[str, Any] = {
            "id": vector_store_id,
            "object": "vector_store",
@ -426,7 +431,7 @@ class OpenAIVectorStoreMixin(ABC):
            "expires_at": None,
            "last_active_at": created_at,
            "file_ids": [],
-            "chunking_strategy": params.chunking_strategy,
+            "chunking_strategy": chunking_strategy.model_dump(),
        }

        # Add provider information to metadata if provided
@ -637,7 +642,7 @@ class OpenAIVectorStoreMixin(ABC):
                    break

            return VectorStoreSearchResponsePage(
-                search_query=search_query,
+                search_query=query if isinstance(query, list) else [query],
                data=data,
                has_more=False,  # For simplicity, we don't implement pagination here
                next_page=None,
@ -647,7 +652,7 @@ class OpenAIVectorStoreMixin(ABC):
            logger.error(f"Error searching vector store {vector_store_id}: {e}")
            # Return empty results on error
            return VectorStoreSearchResponsePage(
-                search_query=search_query,
+                search_query=query if isinstance(query, list) else [query],
                data=[],
                has_more=False,
                next_page=None,
@ -886,8 +891,8 @@ class OpenAIVectorStoreMixin(ABC):

        # Determine pagination info
        has_more = len(file_objects) > limit
-        first_id = file_objects[0].id if file_objects else None
-        last_id = file_objects[-1].id if file_objects else None
+        first_id = limited_files[0].id if file_objects else None
+        last_id = limited_files[-1].id if file_objects else None

        return VectorStoreListFilesResponse(
            data=limited_files,
--- a/tests/integration/batches/recordings/1d782697e7261920e2c21babd28d5126609fdc27c61a1d793792e713a48f6368.json
+++ b/tests/integration/batches/recordings/1d782697e7261920e2c21babd28d5126609fdc27c61a1d793792e713a48f6368.json
--- a/tests/integration/batches/recordings/37b8d062efc39eaa4ac3438f5102044a29fd1e9b844ead2f066a1fb25c85672c.json
+++ b/tests/integration/batches/recordings/37b8d062efc39eaa4ac3438f5102044a29fd1e9b844ead2f066a1fb25c85672c.json
@ -1,422 +0,0 @@
-{
-  "test_id": "tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_e2e_embeddings[emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Hello world",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              -0.034477483,
-              0.030899182,
-              0.0066526434,
-              0.026075281,
-              -0.039411988,
-              -0.16037956,
-              0.06692074,
-              -0.006511468,
-              -0.047467157,
-              0.014774274,
-              0.07094562,
-              0.055527706,
-              0.019183245,
-              -0.026297163,
-              -0.010018651,
-              -0.02694715,
-              0.0223884,
-              -0.02220693,
-              -0.14977267,
-              -0.017530814,
-              0.0075938613,
-              0.054253556,
-              0.0032258728,
-              0.031724673,
-              -0.08466085,
-              -0.029342307,
-              0.05155048,
-              0.048105717,
-              -0.0032670307,
-              -0.05822795,
-              0.041971523,
-              0.022229431,
-              0.1281518,
-              -0.022270948,
-              -0.011725874,
-              0.06294936,
-              -0.032847952,
-              -0.09124354,
-              -0.031128692,
-              0.05274829,
-              0.047067728,
-              -0.08414196,
-              -0.029979317,
-              -0.020692566,
-              0.00949804,
-              -0.0035992558,
-              0.0074442336,
-              0.03928378,
-              0.09326073,
-              -0.0037437282,
-              -0.052663893,
-              -0.058101393,
-              -0.006925679,
-              0.0052269334,
-              0.08290669,
-              0.019312402,
-              0.0062818974,
-              -0.010331665,
-              0.008930684,
-              -0.037712026,
-              -0.045175705,
-              0.023950849,
-              -0.006926045,
-              0.013429504,
-              0.100098,
-              -0.0715888,
-              -0.021700105,
-              0.031693522,
-              -0.05161389,
-              -0.08224763,
-              -0.06577986,
-              -0.009853981,
-              0.005808086,
-              0.07364217,
-              -0.034008067,
-              0.024907362,
-              0.014441484,
-              0.02645124,
-              0.009659713,
-              0.030284341,
-              0.052878983,
-              -0.07536944,
-              0.009890014,
-              0.029907802,
-              0.017498897,
-              0.02313779,
-              0.0018918256,
-              0.0013156217,
-              -0.047173936,
-              -0.011251131,
-              -0.11422648,
-              -0.019960148,
-              0.040278148,
-              0.0022633963,
-              -0.07986738,
-              -0.025357265,
-              0.094500035,
-              -0.029062947,
-              -0.14495483,
-              0.2309815,
-              0.027703581,
-              0.03208736,
-              0.031073036,
-              0.042917974,
-              0.064246915,
-              0.032118786,
-              -0.004844535,
-              0.055775862,
-              -0.03756279,
-              -0.021487191,
-              -0.028432492,
-              -0.028887685,
-              0.03842892,
-              -0.017359573,
-              0.052465834,
-              -0.07493626,
-              -0.031175744,
-              0.021936033,
-              -0.039823197,
-              -0.008681939,
-              0.026978256,
-              -0.048551314,
-              0.011414809,
-              0.029628372,
-              -0.020587107,
-              0.013077965,
-              0.028824588,
-              -3.1978743e-33,
-              0.06475607,
-              -0.018065408,
-              0.05190019,
-              0.12193858,
-              0.028755108,
-              0.008794777,
-              -0.07044016,
-              -0.016856866,
-              0.040675826,
-              0.04222898,
-              0.025450956,
-              0.035772353,
-              -0.049134083,
-              0.0021395232,
-              -0.015527445,
-              0.05065655,
-              -0.04814189,
-              0.03586998,
-              -0.004134139,
-              0.10165314,
-              -0.055980552,
-              -0.010677752,
-              0.011231545,
-              0.09068785,
-              0.004311188,
-              0.035094332,
-              -0.009658399,
-              -0.09383056,
-              0.092755266,
-              0.00799794,
-              -0.0077075018,
-              -0.052119244,
-              -0.01259255,
-              0.0032277475,
-              0.005989667,
-              0.0075889886,
-              0.010571857,
-              -0.08629758,
-              -0.06985891,
-              -0.002511263,
-              -0.091053724,
-              0.0468712,
-              0.05203361,
-              0.0072902967,
-              0.010906411,
-              -0.0052922186,
-              0.013883815,
-              0.021929385,
-              0.0341257,
-              0.060227357,
-              0.00018942523,
-              0.0146624865,
-              -0.07000342,
-              0.028425341,
-              -0.027542787,
-              0.01082086,
-              0.03491755,
-              -0.022430921,
-              0.0096813915,
-              0.07725412,
-              0.021618832,
-              0.114911504,
-              -0.06805403,
-              0.023872944,
-              -0.015999107,
-              -0.017794114,
-              0.06442477,
-              0.03206309,
-              0.050293576,
-              -0.005988605,
-              -0.03376946,
-              0.017821673,
-              0.016567992,
-              0.063335925,
-              0.034753703,
-              0.046586752,
-              0.09789875,
-              -0.006560692,
-              0.025039855,
-              -0.07780643,
-              0.016878096,
-              -0.0010056288,
-              0.02257608,
-              -0.0382721,
-              0.09572481,
-              -0.005296001,
-              0.010567662,
-              -0.11538674,
-              -0.013233586,
-              -0.010786205,
-              -0.083147496,
-              0.073254965,
-              0.049377624,
-              -0.009025328,
-              -0.0957893,
-              3.3687185e-33,
-              0.12494067,
-              0.019226579,
-              -0.058172084,
-              -0.035952393,
-              -0.050862074,
-              -0.045700952,
-              -0.0826631,
-              0.14819908,
-              -0.088347495,
-              0.060315337,
-              0.05109269,
-              0.010308115,
-              0.1411753,
-              0.030833788,
-              0.06101746,
-              -0.052806143,
-              0.13661332,
-              0.00917483,
-              -0.017295862,
-              -0.0128495265,
-              -0.007851698,
-              -0.051084496,
-              -0.05235087,
-              0.0076632234,
-              -0.015217299,
-              0.017015414,
-              0.021324545,
-              0.020506723,
-              -0.12004153,
-              0.014523494,
-              0.026743378,
-              0.025221687,
-              -0.04270567,
-              0.00676352,
-              -0.014453511,
-              0.045142446,
-              -0.091383636,
-              -0.019459482,
-              -0.017806036,
-              -0.055010412,
-              -0.05270923,
-              -0.010370778,
-              -0.052053526,
-              0.020918628,
-              -0.080037735,
-              -0.012147244,
-              -0.057777684,
-              0.023249507,
-              -0.007838778,
-              -0.025807643,
-              -0.07987164,
-              -0.020683115,
-              0.04888083,
-              -0.020459235,
-              -0.049192864,
-              0.01407799,
-              -0.063744746,
-              -0.0077936463,
-              0.016429903,
-              -0.025707569,
-              0.013326097,
-              0.026210392,
-              0.009855086,
-              0.06317218,
-              0.0026150644,
-              -0.0065879063,
-              0.0166049,
-              0.032400407,
-              0.038005095,
-              -0.036269873,
-              -0.0069020875,
-              0.00019545198,
-              -0.0017537851,
-              -0.027427403,
-              -0.02801922,
-              0.049696837,
-              -0.028842367,
-              -0.0023814398,
-              0.01481421,
-              0.00976869,
-              0.0057697925,
-              0.01341087,
-              0.00551593,
-              0.037237898,
-              0.007291808,
-              0.040068958,
-              0.08141818,
-              0.07197348,
-              -0.013163506,
-              -0.042782705,
-              -0.010938265,
-              0.0049547236,
-              -0.00923014,
-              0.035068717,
-              -0.051007,
-              -1.5708556e-08,
-              -0.088558294,
-              0.02391312,
-              -0.016132735,
-              0.03169382,
-              0.027184812,
-              0.052484553,
-              -0.047118798,
-              -0.058789898,
-              -0.063239954,
-              0.040775288,
-              0.049807984,
-              0.106462926,
-              -0.07448737,
-              -0.012401869,
-              0.018361589,
-              0.039486438,
-              -0.024830224,
-              0.014500051,
-              -0.03712332,
-              0.020043189,
-              8.399218e-05,
-              0.009852795,
-              0.024823224,
-              -0.05252818,
-              0.02932855,
-              -0.0871494,
-              -0.01447227,
-              0.025996566,
-              -0.018731978,
-              -0.07618361,
-              0.03505914,
-              0.10363578,
-              -0.0280213,
-              0.012769872,
-              -0.076482065,
-              -0.018743375,
-              0.024961015,
-              0.08152011,
-              0.06866303,
-              -0.06411612,
-              -0.08387694,
-              0.061479986,
-              -0.03345598,
-              -0.10615398,
-              -0.040166635,
-              0.032536518,
-              0.076652974,
-              -0.07297006,
-              0.00039833272,
-              -0.0409393,
-              -0.07580284,
-              0.027465926,
-              0.07468789,
-              0.017779494,
-              0.09106629,
-              0.11033428,
-              0.00065298256,
-              0.051472265,
-              -0.01461242,
-              0.033237122,
-              0.023671487,
-              -0.022980422,
-              0.038988944,
-              0.030206418
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 2,
-          "total_tokens": 2
-        }
-      }
-    },
-    "is_streaming": false
-  },
-  "id_normalization_mapping": {}
-}
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@ -0,0 +1,13 @@
+{
+  "default": [
+    {"suite": "base", "setup": "ollama"},
+    {"suite": "vision", "setup": "ollama-vision"},
+    {"suite": "responses", "setup": "gpt"},
+    {"suite": "base-vllm-subset", "setup": "vllm"}
+  ],
+  "schedules": {
+    "1 0 * * 0": [
+      {"suite": "base", "setup": "vllm"}
+    ]
+  }
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
@ -0,0 +1,89 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b-instruct-fp16",
+          "created": 1760453641,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "qwen3:4b",
+          "created": 1757615302,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-oss:latest",
+          "created": 1756395223,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "nomic-embed-text:latest",
+          "created": 1756318548,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:3b",
+          "created": 1755191039,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "all-minilm:l6-v2",
+          "created": 1753968177,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:1b",
+          "created": 1746124735,
+          "object": "model",
+          "owned_by": "library"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "llama3.2:latest",
+          "created": 1746044170,
+          "object": "model",
+          "owned_by": "library"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374291,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-f70298e4ea3e4b4eb7f2cc2deb7a2b01",
+              "object": "model_permission",
+              "created": 1762374291,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json
+++ b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json
--- a/tests/integration/inference/recordings/02bc4eaa12503edd2af031c7f243b280703e4ed17024e21641119530cc78bd0e.json
+++ b/tests/integration/inference/recordings/02bc4eaa12503edd2af031c7f243b280703e4ed17024e21641119530cc78bd0e.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:06.037921-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/0483665572c6b72adda44aa0b31c105c4a5ac4da26d514c61fbd319910e1ce05.json
+++ b/tests/integration/inference/recordings/0483665572c6b72adda44aa0b31c105c4a5ac4da26d514c61fbd319910e1ce05.json
@ -1,422 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Test user parameter",
-      "encoding_format": "float",
-      "user": "test-user-123"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.043779343,
-              0.021533398,
-              -0.081306435,
-              0.010584965,
-              -0.079082854,
-              -0.03219143,
-              0.13092613,
-              0.04234389,
-              -0.11600539,
-              -0.07588513,
-              0.04182356,
-              -0.08061255,
-              0.038127176,
-              -0.010701234,
-              0.015768763,
-              -0.04193689,
-              0.04310592,
-              -0.033361685,
-              0.013566423,
-              -0.010392366,
-              0.015551022,
-              -0.037858423,
-              -0.050305344,
-              -0.025666261,
-              -0.047879875,
-              -0.087179765,
-              0.016856788,
-              -0.036765736,
-              0.006393739,
-              0.020844297,
-              0.11262393,
-              -0.002143682,
-              -0.07910913,
-              0.038748607,
-              0.11532516,
-              -0.019759571,
-              0.0066967797,
-              -0.021164352,
-              -0.014471563,
-              -0.0027048697,
-              -0.034388524,
-              -0.052571636,
-              -0.030607725,
-              0.04747725,
-              -0.02431059,
-              0.0109337615,
-              -0.03946421,
-              0.071846664,
-              -0.020690937,
-              0.01898796,
-              0.042931512,
-              -0.0077551426,
-              0.0025911122,
-              -0.058268107,
-              0.0117475465,
-              -0.022701943,
-              0.0017815019,
-              -0.012612941,
-              0.030724185,
-              0.017728312,
-              -0.06155491,
-              -0.03656162,
-              0.02583153,
-              0.02537894,
-              0.012139213,
-              0.009105951,
-              -0.027318193,
-              -0.093389414,
-              0.005184693,
-              0.007488449,
-              -0.07540277,
-              0.010159999,
-              -0.028444426,
-              0.030260745,
-              0.0036438918,
-              -0.022627153,
-              -0.037846327,
-              -0.08381657,
-              -0.012445195,
-              -0.048908208,
-              0.029149827,
-              -0.044437535,
-              -0.07520237,
-              -0.020924438,
-              0.06342514,
-              0.1629199,
-              0.060563333,
-              -0.012817673,
-              -0.031030292,
-              0.018368995,
-              0.11223112,
-              0.07292473,
-              -0.062686674,
-              -0.031803295,
-              -0.017489262,
-              0.048433464,
-              -0.041148387,
-              -0.04183779,
-              -0.05994369,
-              0.15909556,
-              -0.027785666,
-              -0.012455991,
-              0.056005318,
-              -0.019891974,
-              0.022063067,
-              0.006342065,
-              0.0464118,
-              -0.07311654,
-              0.033282198,
-              0.05949105,
-              -0.033307947,
-              0.030738499,
-              0.008186239,
-              -0.020268966,
-              0.056593496,
-              -0.081526734,
-              0.023390312,
-              0.0060836566,
-              -0.07992586,
-              0.013986445,
-              0.052250065,
-              0.027186505,
-              -0.049284942,
-              0.028148174,
-              0.019493744,
-              0.05418436,
-              0.0827222,
-              -1.8825437e-33,
-              0.01360945,
-              -0.010870715,
-              0.015887791,
-              0.069373555,
-              -0.051129147,
-              0.08999179,
-              0.044494778,
-              0.08100757,
-              0.018944906,
-              -0.020974122,
-              -0.017938385,
-              -0.021756735,
-              0.010972489,
-              0.015099965,
-              0.017018452,
-              0.094338946,
-              0.0034407445,
-              0.010244923,
-              -0.044709302,
-              0.0018059182,
-              0.015817573,
-              -0.065777056,
-              -0.004948138,
-              0.0044092103,
-              -0.019589791,
-              -0.092789896,
-              -0.025898295,
-              0.044104066,
-              0.0541385,
-              -0.007362511,
-              -0.021487307,
-              -0.036836285,
-              -0.09148704,
-              0.084001675,
-              -0.018094191,
-              0.003797567,
-              0.020257449,
-              0.04394643,
-              -0.0772898,
-              0.0057312953,
-              -0.054519102,
-              -0.024835315,
-              0.0753162,
-              0.034552757,
-              -0.081203006,
-              -0.12210961,
-              -0.0053012627,
-              0.00780717,
-              0.050265096,
-              0.015569535,
-              -0.056362487,
-              0.039800324,
-              0.013022089,
-              -0.04015537,
-              0.014401654,
-              -0.033209093,
-              -0.008451782,
-              -0.037590392,
-              -0.01965779,
-              0.01730637,
-              -0.00896531,
-              -0.0018413392,
-              -0.0030382746,
-              0.030460354,
-              -0.05112036,
-              -0.086875,
-              -0.018338922,
-              -0.11328767,
-              0.07325826,
-              0.046035297,
-              0.012633494,
-              -0.06343216,
-              -0.028439038,
-              0.020128354,
-              -0.07883383,
-              -0.00069870794,
-              -0.03155447,
-              0.12306934,
-              0.004300722,
-              -0.026421167,
-              0.078361824,
-              -0.077461444,
-              -0.021267027,
-              0.048929654,
-              0.02919381,
-              -0.0092880055,
-              -0.030666346,
-              -0.04102384,
-              -0.03860138,
-              -0.08042292,
-              0.023227168,
-              0.04191858,
-              -0.058156747,
-              0.0585743,
-              0.076342255,
-              4.465569e-34,
-              -0.019599343,
-              0.040230304,
-              0.01455632,
-              0.034345042,
-              0.04392999,
-              -0.023241352,
-              0.067749046,
-              -0.03010354,
-              -0.09075954,
-              -0.019227842,
-              -0.027724287,
-              -0.00062344945,
-              0.0042892746,
-              0.053643614,
-              0.04075099,
-              0.032581333,
-              -0.107116826,
-              -0.0500636,
-              -0.016655827,
-              -0.007782394,
-              -0.111523,
-              0.07476429,
-              -0.016019335,
-              -0.050536986,
-              -0.11320647,
-              -0.0061384854,
-              0.050886273,
-              -0.030283457,
-              0.04318923,
-              0.03301474,
-              0.02362771,
-              0.046507858,
-              -0.03416386,
-              0.036145207,
-              0.023037339,
-              -0.026803765,
-              0.06361122,
-              0.09975251,
-              0.035269737,
-              0.1554014,
-              0.083479255,
-              0.10931981,
-              0.046847064,
-              -0.010136355,
-              -0.032541983,
-              0.12926093,
-              0.031193413,
-              -0.09971323,
-              0.010830718,
-              0.02325219,
-              -0.011917061,
-              0.010155018,
-              0.06883269,
-              0.009340846,
-              -0.022698723,
-              -0.042815465,
-              -0.048211087,
-              -0.085067384,
-              0.05105234,
-              0.045155898,
-              -0.03564869,
-              0.06549556,
-              0.048875004,
-              0.037915554,
-              -0.14071068,
-              -0.067095764,
-              0.009898252,
-              -0.0049653547,
-              -0.044304688,
-              0.0039006064,
-              -0.026903173,
-              -0.066124685,
-              0.040738244,
-              -0.052228633,
-              0.060485654,
-              -0.041119356,
-              -0.04312945,
-              -0.025152665,
-              0.08556276,
-              -0.044942576,
-              0.06393979,
-              -0.024227533,
-              -0.05052092,
-              -0.0020624825,
-              -0.078943975,
-              0.0026753,
-              0.02068896,
-              0.102683865,
-              -0.01237572,
-              0.056172684,
-              0.06552171,
-              0.030940128,
-              -0.07721113,
-              -0.061241012,
-              -0.016143149,
-              -1.3511957e-08,
-              -0.050416306,
-              -0.033628013,
-              0.046722032,
-              0.04744138,
-              -0.04411888,
-              0.04631675,
-              -0.0060847937,
-              -0.053873356,
-              0.013075445,
-              0.050437532,
-              -0.009895477,
-              -0.0041795173,
-              0.07229928,
-              0.021081135,
-              0.02672776,
-              -0.07482113,
-              -0.026757998,
-              0.052755926,
-              -0.034690056,
-              0.039811596,
-              -0.016370349,
-              0.045900222,
-              -0.02250936,
-              0.023861,
-              0.04912799,
-              0.09111738,
-              -0.0024878879,
-              0.049395334,
-              -0.03861115,
-              0.020867983,
-              0.076049894,
-              0.084881924,
-              -0.051956687,
-              -0.06878504,
-              -0.061384037,
-              0.077220954,
-              -0.06454818,
-              0.044513144,
-              0.008181126,
-              0.015890416,
-              -0.04280811,
-              0.005317184,
-              0.0034429359,
-              0.0031937633,
-              -0.013058055,
-              -0.09134677,
-              0.06425565,
-              -0.054977305,
-              0.0007087448,
-              -0.06258866,
-              -0.034974415,
-              -0.029966963,
-              0.044276785,
-              0.017868131,
-              -0.027976807,
-              -0.036579583,
-              0.021142753,
-              0.06057356,
-              -0.03133335,
-              -0.014331035,
-              0.034653842,
-              0.052315667,
-              -0.036585484,
-              0.028209662
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 3,
-          "total_tokens": 3
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/0ff4361a4687d14795386a4499d8fcc04772020ae9d840ceae4fb014f620593b.json
+++ b/tests/integration/inference/recordings/0ff4361a4687d14795386a4499d8fcc04772020ae9d840ceae4fb014f620593b.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.489695-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/166dbff8b991d40d060f9307fc2a67fc24e29dc542fdce9611756d28403e4e86.json
+++ b/tests/integration/inference/recordings/166dbff8b991d40d060f9307fc2a67fc24e29dc542fdce9611756d28403e4e86.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:10.993052-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/1774e408dada8623a932f20fd67ab722cbff2a2213309d8a33ec2ed5a444e5d4.json
+++ b/tests/integration/inference/recordings/1774e408dada8623a932f20fd67ab722cbff2a2213309d8a33ec2ed5a444e5d4.json
@ -1,54 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Test dimensions parameter",
-      "encoding_format": "float",
-      "dimensions": 16
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.25369987,
-              0.016355688,
-              -0.29676768,
-              0.316427,
-              -0.18642858,
-              0.076206245,
-              -0.031503417,
-              0.29860005,
-              -0.496603,
-              -0.36621967,
-              0.25334543,
-              -0.333392,
-              0.005993569,
-              0.14079759,
-              -0.13775977,
-              -0.14680246
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 3,
-          "total_tokens": 3
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/1f3149657717e1cca83f08af67b9f34993a49de31349b11ce13c5d0465ca5f6a.json
+++ b/tests/integration/inference/recordings/1f3149657717e1cca83f08af67b9f34993a49de31349b11ce13c5d0465ca5f6a.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "This is the first text",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              -0.0010839553,
-              0.067364,
-              0.015185306,
-              0.037240896,
-              0.029337138,
-              0.015160007,
-              0.0743005,
-              -0.0032980628,
-              0.06581814,
-              -0.021851996,
-              0.034412965,
-              0.051005766,
-              -0.011422501,
-              -0.025062356,
-              -0.051756065,
-              0.027193472,
-              0.07849549,
-              -0.05999108,
-              0.010471458,
-              -0.003400683,
-              0.043449093,
-              0.122919865,
-              9.668583e-05,
-              0.002153268,
-              0.018064681,
-              0.045069378,
-              -0.09762388,
-              0.11186886,
-              0.049657565,
-              -0.03485217,
-              -0.039568134,
-              0.003532146,
-              0.15894793,
-              0.06341193,
-              0.047953114,
-              0.011617699,
-              0.009799243,
-              0.015377702,
-              0.009379663,
-              0.079989135,
-              0.019207356,
-              -0.13718612,
-              0.01730099,
-              0.013687199,
-              0.014266827,
-              -0.00022628276,
-              -0.017710257,
-              0.02291068,
-              0.03590651,
-              -0.015361055,
-              -0.00978436,
-              -0.0401825,
-              -0.011481894,
-              0.00014050963,
-              0.08540761,
-              0.08730027,
-              0.0046967245,
-              0.006164595,
-              0.003031956,
-              0.008891807,
-              -0.006260525,
-              0.05061661,
-              0.0005252785,
-              0.0467754,
-              0.09363822,
-              -0.012814104,
-              0.017708639,
-              -0.062698044,
-              -0.11535818,
-              0.041123625,
-              -0.014939021,
-              0.044815876,
-              -0.020868087,
-              0.042999975,
-              -0.061038766,
-              0.019998673,
-              -0.068740115,
-              -0.035516046,
-              0.041884515,
-              0.012185281,
-              -0.029084096,
-              -0.06643917,
-              0.030638866,
-              0.05149607,
-              -0.12815061,
-              0.06821646,
-              -0.047070153,
-              -0.032925386,
-              0.007499353,
-              -0.017841771,
-              0.038296465,
-              -0.015792726,
-              0.07054022,
-              0.038072467,
-              -0.11428876,
-              0.04210153,
-              -0.11162366,
-              -0.045723915,
-              -0.028951947,
-              0.12735675,
-              -0.013946637,
-              -0.027157523,
-              0.07295939,
-              0.024098422,
-              -0.054050542,
-              -0.13125896,
-              0.03013205,
-              -0.023223283,
-              -0.019072957,
-              -0.007864101,
-              -0.021954412,
-              -0.05329901,
-              -0.07088355,
-              -0.0115214065,
-              -0.023399564,
-              -0.015638318,
-              0.05148062,
-              0.029261008,
-              0.06481798,
-              0.064031154,
-              0.014445124,
-              -0.058017716,
-              -0.069921836,
-              -0.023950975,
-              -0.08490842,
-              -0.08779567,
-              0.048162255,
-              -6.1240354e-33,
-              0.010315817,
-              0.038685724,
-              0.0031864564,
-              0.0357421,
-              0.0050265454,
-              -0.004210234,
-              -0.053900674,
-              -0.02988569,
-              -0.07548199,
-              -0.078777455,
-              -0.012271205,
-              -0.05056629,
-              0.020729113,
-              -0.051866043,
-              -0.059254467,
-              -0.059903424,
-              -0.055699438,
-              0.032196835,
-              -0.006328442,
-              -0.021668624,
-              -0.059921067,
-              0.0519611,
-              0.051227964,
-              -0.063502096,
-              -0.04873505,
-              -0.014265467,
-              0.0025537873,
-              -0.024346355,
-              -0.0055181426,
-              0.02007461,
-              -0.10196586,
-              0.010727814,
-              -0.023194604,
-              -0.081025146,
-              -0.014997581,
-              0.0017926424,
-              0.045078833,
-              -0.052792255,
-              -0.05368693,
-              -0.013245513,
-              -0.019808132,
-              0.020031843,
-              -0.00081401254,
-              -0.10117647,
-              -0.0007066768,
-              0.09663035,
-              -0.03946875,
-              0.04954661,
-              0.042237334,
-              0.007943922,
-              -0.05234212,
-              0.051887065,
-              0.03711589,
-              0.034850314,
-              0.063441575,
-              -0.026583876,
-              -0.009227281,
-              -0.0025737104,
-              -0.056082893,
-              0.0020716325,
-              -0.020129146,
-              0.0012315192,
-              -0.0017609745,
-              0.019111704,
-              0.016572498,
-              -0.011374,
-              0.010381644,
-              -0.007864189,
-              0.04664868,
-              -0.046856377,
-              -0.08523834,
-              -0.008974813,
-              0.012022968,
-              0.013285977,
-              0.015182303,
-              0.03708482,
-              0.026587088,
-              0.014473839,
-              -0.013946565,
-              0.01999883,
-              -0.06888259,
-              -0.07111367,
-              0.012369427,
-              0.032828625,
-              -0.03152666,
-              0.045777358,
-              0.06801705,
-              -0.07747748,
-              0.018461134,
-              0.06620267,
-              -0.086365156,
-              0.008950603,
-              0.041320425,
-              0.009541193,
-              0.0066037327,
-              4.71081e-33,
-              -0.026172558,
-              0.0013145636,
-              -0.014140948,
-              -0.024360213,
-              0.06931815,
-              0.031448748,
-              0.037257418,
-              0.06468137,
-              0.049403396,
-              0.11072201,
-              0.04985356,
-              0.06679111,
-              0.04153249,
-              -0.034106053,
-              0.070283465,
-              0.034855895,
-              0.12902643,
-              -0.021033453,
-              0.008940618,
-              0.030177405,
-              -0.022881329,
-              0.036504544,
-              -0.13194299,
-              0.045612644,
-              -0.0127895875,
-              0.04174139,
-              0.1232064,
-              -0.013484046,
-              -0.007285246,
-              -0.029776007,
-              0.025007037,
-              -0.009516822,
-              0.02475585,
-              0.023208592,
-              -0.019141924,
-              0.02259424,
-              0.013740329,
-              -0.038490705,
-              -0.014461541,
-              0.075218394,
-              0.13589163,
-              0.009839605,
-              -0.037563317,
-              -0.02737327,
-              -0.016485116,
-              -0.048845276,
-              -0.03523722,
-              -0.05439929,
-              -0.0017957076,
-              0.03563579,
-              -0.010255764,
-              -0.01859244,
-              -0.03647324,
-              -0.055985246,
-              -0.007833892,
-              0.009086756,
-              -0.007333394,
-              0.050386623,
-              -0.0002305643,
-              -0.03637248,
-              -0.024937423,
-              0.058877032,
-              -0.07250415,
-              0.07401245,
-              0.053917013,
-              -0.051895224,
-              -0.006332244,
-              0.07850189,
-              -0.01695057,
-              -0.006673017,
-              0.012659739,
-              -0.014127065,
-              -0.13639799,
-              -0.08524976,
-              -0.017533274,
-              -0.0046930755,
-              0.013687301,
-              0.0009185522,
-              -0.0719948,
-              -0.06887779,
-              0.14208324,
-              0.03187123,
-              -0.055919908,
-              0.030401653,
-              0.061900012,
-              0.029921472,
-              -0.00096237566,
-              -0.065010294,
-              -0.020657646,
-              0.039562404,
-              -0.123846576,
-              0.0028867351,
-              0.051196404,
-              0.13397509,
-              -0.088453874,
-              -1.7590333e-08,
-              -0.025786474,
-              -0.080303885,
-              -0.09164947,
-              0.031999,
-              0.00584884,
-              0.11464121,
-              0.023377793,
-              -0.06902527,
-              -0.055941124,
-              -0.05787791,
-              0.014640494,
-              0.080320895,
-              0.0037027278,
-              -0.030824674,
-              0.024432683,
-              0.008549355,
-              -0.05291309,
-              -0.06636625,
-              0.0007468212,
-              -0.02379191,
-              0.030766092,
-              0.054053318,
-              -0.0027251292,
-              -0.09928475,
-              -0.0150488615,
-              0.016240431,
-              -0.0015727071,
-              0.01190173,
-              0.007895162,
-              0.04894733,
-              0.00487708,
-              0.08263861,
-              -0.014527478,
-              -0.043879665,
-              0.004633697,
-              0.024611989,
-              0.023827499,
-              0.02366802,
-              0.050754935,
-              -0.051841788,
-              0.0212632,
-              -0.0034418616,
-              -0.021175656,
-              0.020591663,
-              -0.06475325,
-              0.0542002,
-              0.027792262,
-              -0.05295982,
-              0.01509645,
-              -0.11977527,
-              -0.03416359,
-              -0.012206606,
-              0.047451705,
-              0.020876253,
-              -0.026368074,
-              0.01502373,
-              0.033982284,
-              0.059788153,
-              -0.052526973,
-              0.03356499,
-              0.061180886,
-              0.096336305,
-              0.116353564,
-              -0.016122948
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 5,
-          "total_tokens": 5
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/2e4f0cbbc7bce72387e0abc9c8f003c863747faf23b394c790098feef0652fea.json
+++ b/tests/integration/inference/recordings/2e4f0cbbc7bce72387e0abc9c8f003c863747faf23b394c790098feef0652fea.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Test encoding format",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.019109152,
-              -0.0205217,
-              -0.071471564,
-              -0.023057504,
-              -0.06572786,
-              -0.0057331678,
-              -0.029395059,
-              -0.031822033,
-              -0.015748156,
-              -0.039123703,
-              0.02694331,
-              -0.0641754,
-              0.013510709,
-              0.050364953,
-              -0.03114308,
-              -0.08322274,
-              -0.03192984,
-              0.074970365,
-              -0.016377378,
-              -0.0013804765,
-              0.03850419,
-              -0.03441017,
-              -0.0048610102,
-              -0.03094053,
-              0.051915165,
-              0.009193639,
-              0.0071807485,
-              0.066353165,
-              0.024559105,
-              -0.04767663,
-              0.0376255,
-              -0.042586852,
-              0.078906916,
-              0.04827334,
-              0.13389648,
-              0.013978803,
-              0.03242126,
-              -0.08890431,
-              -0.014188366,
-              0.03553346,
-              -0.02476171,
-              -0.028628638,
-              0.047652308,
-              0.026259335,
-              0.048472118,
-              0.06663718,
-              -0.013584004,
-              0.071824096,
-              -0.073066786,
-              -0.050326068,
-              0.0039502876,
-              0.03300394,
-              -0.047816053,
-              -0.017657546,
-              0.010284664,
-              -0.10525716,
-              -0.010034394,
-              0.014627846,
-              -0.053289402,
-              0.060343288,
-              -0.10079798,
-              0.011359217,
-              -0.007258805,
-              0.05346498,
-              -0.0068726647,
-              0.03697505,
-              0.024016414,
-              0.023924585,
-              -0.011357761,
-              -0.119573325,
-              -0.115692526,
-              -0.06673285,
-              -0.04233929,
-              0.09302018,
-              0.02486003,
-              0.084047645,
-              0.0030104683,
-              -0.06605523,
-              0.027435688,
-              -0.032412402,
-              -0.025584543,
-              -0.06590182,
-              0.067799605,
-              0.0976311,
-              0.07360619,
-              0.034108408,
-              0.056534845,
-              0.076705806,
-              -0.05179011,
-              0.053681813,
-              0.0054462817,
-              0.015972052,
-              0.0035656213,
-              0.06333522,
-              -0.01597322,
-              0.05295729,
-              0.11539089,
-              0.055200845,
-              0.037667733,
-              0.08083974,
-              0.035557732,
-              -0.07982552,
-              -0.012100598,
-              -0.07612801,
-              -0.0695667,
-              -0.017815348,
-              0.16996554,
-              -0.0048157335,
-              0.09073964,
-              -0.07196438,
-              0.020009195,
-              -0.05956153,
-              -0.06312686,
-              -0.07716358,
-              0.0150949685,
-              -0.050339524,
-              -0.05444592,
-              -0.023078114,
-              -0.035431463,
-              -0.030625492,
-              -0.053284056,
-              -0.06745872,
-              -0.08049862,
-              0.002800386,
-              -0.0114065055,
-              -0.029938627,
-              0.024243163,
-              -1.5107368e-33,
-              -0.02984805,
-              -0.00033025863,
-              0.0030491,
-              0.023082128,
-              -0.04808977,
-              -0.0027841914,
-              -0.037461873,
-              0.016201235,
-              -0.02998979,
-              0.015712254,
-              0.009664366,
-              -0.03984875,
-              -0.029493092,
-              0.03837007,
-              -0.005226541,
-              0.06857773,
-              -0.007891026,
-              -0.0019036188,
-              -0.035219382,
-              0.03627955,
-              0.05867878,
-              0.023777487,
-              0.044425115,
-              -0.025999734,
-              -0.025318418,
-              -0.02685328,
-              -0.02368557,
-              -0.094386704,
-              0.0016880591,
-              0.0065193563,
-              -0.09711005,
-              -0.053493332,
-              -0.08241291,
-              0.023502836,
-              -0.02407441,
-              0.015992055,
-              0.0050546136,
-              0.030476829,
-              -0.088438906,
-              0.11427086,
-              0.028378993,
-              0.02985018,
-              0.022821706,
-              0.018776013,
-              0.056330692,
-              -0.020254886,
-              -0.00070521404,
-              -0.0864014,
-              0.020228866,
-              -0.0039839754,
-              0.0010032665,
-              0.065425254,
-              -0.036518592,
-              0.032341316,
-              0.023112345,
-              0.044507477,
-              0.09644409,
-              -0.07272818,
-              0.03370691,
-              0.042783204,
-              -0.052776046,
-              0.0003352446,
-              0.061005518,
-              -0.019623613,
-              -0.023274273,
-              -0.11602989,
-              0.007926991,
-              -0.12529127,
-              0.017030548,
-              0.013484081,
-              -0.030528491,
-              -0.024298145,
-              0.006284904,
-              -0.015568167,
-              -0.072781205,
-              0.012985074,
-              0.015977127,
-              0.0051657534,
-              -0.0026022948,
-              -0.059578825,
-              0.06372584,
-              -0.0019363016,
-              0.018695941,
-              -0.009242735,
-              -0.05887247,
-              -0.032524884,
-              -0.009591115,
-              -0.047377545,
-              0.020585002,
-              -0.007134836,
-              0.050135154,
-              0.016087264,
-              -0.0058878902,
-              -0.07661024,
-              0.0820671,
-              1.6053074e-33,
-              -0.0056476775,
-              0.06719423,
-              -0.011510322,
-              0.05586423,
-              -0.08886697,
-              -0.036528286,
-              0.12134926,
-              0.028969096,
-              0.022419011,
-              0.047327086,
-              0.07621525,
-              -0.07937209,
-              0.0020504447,
-              -0.023489932,
-              -0.029759271,
-              -0.04879825,
-              -0.034876924,
-              0.06461666,
-              0.051493492,
-              0.008284975,
-              -0.031793926,
-              0.098015875,
-              0.008122038,
-              0.01032072,
-              0.059404474,
-              0.05176487,
-              0.042960417,
-              0.0069373515,
-              0.027306866,
-              0.039226852,
-              0.062416088,
-              0.051797673,
-              0.0053232666,
-              0.05965781,
-              -0.008935817,
-              -0.0135501,
-              0.08726531,
-              0.028408607,
-              -0.006820522,
-              0.052098107,
-              0.049510423,
-              0.055176627,
-              -0.016774576,
-              0.077848226,
-              0.026121203,
-              0.031311177,
-              0.011812256,
-              -0.0341528,
-              0.052825138,
-              0.003484205,
-              0.09811821,
-              0.029693138,
-              -0.031354938,
-              -0.012068096,
-              0.018686052,
-              -0.032609653,
-              -0.09638639,
-              0.033928476,
-              -0.07897009,
-              -0.008300913,
-              -0.04915284,
-              0.02006342,
-              0.061743837,
-              -0.018412542,
-              -0.033583082,
-              -0.090903476,
-              0.021116566,
-              -0.022445552,
-              -0.011814237,
-              -0.048816226,
-              0.048287436,
-              -0.07294675,
-              -0.02198573,
-              0.062477604,
-              0.023308119,
-              -0.052141402,
-              -0.05409648,
-              0.062339973,
-              0.052301563,
-              0.051384836,
-              -0.02426406,
-              -0.018824687,
-              -0.01660311,
-              0.09330242,
-              0.008502433,
-              0.063408315,
-              0.019377569,
-              0.047027417,
-              -0.0058769877,
-              -0.0034505578,
-              0.07956527,
-              0.10210641,
-              0.015302805,
-              0.04089992,
-              0.038895626,
-              -1.2710905e-08,
-              -0.019304764,
-              -0.1217849,
-              -0.047983564,
-              -0.053382736,
-              -0.113197215,
-              0.05181196,
-              -0.10498226,
-              -0.08524135,
-              0.0061870585,
-              -0.029899841,
-              0.064561576,
-              -0.028730206,
-              -0.064735174,
-              -0.024887148,
-              0.0026119591,
-              -0.008796896,
-              0.030246036,
-              0.009807871,
-              0.0044631795,
-              0.0851423,
-              -0.026132204,
-              0.11360852,
-              -0.0045760865,
-              -0.036643907,
-              -0.09078616,
-              0.081466354,
-              0.012066122,
-              0.07288108,
-              0.004079195,
-              -0.05064171,
-              0.068772145,
-              0.029108258,
-              0.014786602,
-              -0.11868081,
-              -0.05042858,
-              0.05376578,
-              0.04570744,
-              0.074074544,
-              0.028540619,
-              0.03937392,
-              0.0291862,
-              -0.035710927,
-              -0.09132387,
-              -0.047720414,
-              -0.00082342024,
-              -0.073688805,
-              0.011024812,
-              0.015703982,
-              -0.03590976,
-              -0.08121826,
-              0.020365681,
-              -0.045287356,
-              -0.024955628,
-              0.001167751,
-              0.00037544646,
-              -0.026392939,
-              -0.032434102,
-              0.003407464,
-              -0.007060387,
-              0.024250468,
-              0.076347135,
-              0.039537415,
-              0.036043648,
-              -0.07085338
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 3,
-          "total_tokens": 3
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/2efc033e9a1b66980245aabc44b6d7fe2509aafa52d6aa19eee2cde5245cd3b9.json
+++ b/tests/integration/inference/recordings/2efc033e9a1b66980245aabc44b6d7fe2509aafa52d6aa19eee2cde5245cd3b9.json
@ -1,422 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Test user parameter",
-      "encoding_format": "base64",
-      "user": "test-user-123"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.043779343,
-              0.021533398,
-              -0.081306435,
-              0.010584965,
-              -0.079082854,
-              -0.03219143,
-              0.13092613,
-              0.04234389,
-              -0.11600539,
-              -0.07588513,
-              0.04182356,
-              -0.08061255,
-              0.038127176,
-              -0.010701234,
-              0.015768763,
-              -0.04193689,
-              0.04310592,
-              -0.033361685,
-              0.013566423,
-              -0.010392366,
-              0.015551022,
-              -0.037858423,
-              -0.050305344,
-              -0.025666261,
-              -0.047879875,
-              -0.087179765,
-              0.016856788,
-              -0.036765736,
-              0.006393739,
-              0.020844297,
-              0.11262393,
-              -0.002143682,
-              -0.07910913,
-              0.038748607,
-              0.11532516,
-              -0.019759571,
-              0.0066967797,
-              -0.021164352,
-              -0.014471563,
-              -0.0027048697,
-              -0.034388524,
-              -0.052571636,
-              -0.030607725,
-              0.04747725,
-              -0.02431059,
-              0.0109337615,
-              -0.03946421,
-              0.071846664,
-              -0.020690937,
-              0.01898796,
-              0.042931512,
-              -0.0077551426,
-              0.0025911122,
-              -0.058268107,
-              0.0117475465,
-              -0.022701943,
-              0.0017815019,
-              -0.012612941,
-              0.030724185,
-              0.017728312,
-              -0.06155491,
-              -0.03656162,
-              0.02583153,
-              0.02537894,
-              0.012139213,
-              0.009105951,
-              -0.027318193,
-              -0.093389414,
-              0.005184693,
-              0.007488449,
-              -0.07540277,
-              0.010159999,
-              -0.028444426,
-              0.030260745,
-              0.0036438918,
-              -0.022627153,
-              -0.037846327,
-              -0.08381657,
-              -0.012445195,
-              -0.048908208,
-              0.029149827,
-              -0.044437535,
-              -0.07520237,
-              -0.020924438,
-              0.06342514,
-              0.1629199,
-              0.060563333,
-              -0.012817673,
-              -0.031030292,
-              0.018368995,
-              0.11223112,
-              0.07292473,
-              -0.062686674,
-              -0.031803295,
-              -0.017489262,
-              0.048433464,
-              -0.041148387,
-              -0.04183779,
-              -0.05994369,
-              0.15909556,
-              -0.027785666,
-              -0.012455991,
-              0.056005318,
-              -0.019891974,
-              0.022063067,
-              0.006342065,
-              0.0464118,
-              -0.07311654,
-              0.033282198,
-              0.05949105,
-              -0.033307947,
-              0.030738499,
-              0.008186239,
-              -0.020268966,
-              0.056593496,
-              -0.081526734,
-              0.023390312,
-              0.0060836566,
-              -0.07992586,
-              0.013986445,
-              0.052250065,
-              0.027186505,
-              -0.049284942,
-              0.028148174,
-              0.019493744,
-              0.05418436,
-              0.0827222,
-              -1.8825437e-33,
-              0.01360945,
-              -0.010870715,
-              0.015887791,
-              0.069373555,
-              -0.051129147,
-              0.08999179,
-              0.044494778,
-              0.08100757,
-              0.018944906,
-              -0.020974122,
-              -0.017938385,
-              -0.021756735,
-              0.010972489,
-              0.015099965,
-              0.017018452,
-              0.094338946,
-              0.0034407445,
-              0.010244923,
-              -0.044709302,
-              0.0018059182,
-              0.015817573,
-              -0.065777056,
-              -0.004948138,
-              0.0044092103,
-              -0.019589791,
-              -0.092789896,
-              -0.025898295,
-              0.044104066,
-              0.0541385,
-              -0.007362511,
-              -0.021487307,
-              -0.036836285,
-              -0.09148704,
-              0.084001675,
-              -0.018094191,
-              0.003797567,
-              0.020257449,
-              0.04394643,
-              -0.0772898,
-              0.0057312953,
-              -0.054519102,
-              -0.024835315,
-              0.0753162,
-              0.034552757,
-              -0.081203006,
-              -0.12210961,
-              -0.0053012627,
-              0.00780717,
-              0.050265096,
-              0.015569535,
-              -0.056362487,
-              0.039800324,
-              0.013022089,
-              -0.04015537,
-              0.014401654,
-              -0.033209093,
-              -0.008451782,
-              -0.037590392,
-              -0.01965779,
-              0.01730637,
-              -0.00896531,
-              -0.0018413392,
-              -0.0030382746,
-              0.030460354,
-              -0.05112036,
-              -0.086875,
-              -0.018338922,
-              -0.11328767,
-              0.07325826,
-              0.046035297,
-              0.012633494,
-              -0.06343216,
-              -0.028439038,
-              0.020128354,
-              -0.07883383,
-              -0.00069870794,
-              -0.03155447,
-              0.12306934,
-              0.004300722,
-              -0.026421167,
-              0.078361824,
-              -0.077461444,
-              -0.021267027,
-              0.048929654,
-              0.02919381,
-              -0.0092880055,
-              -0.030666346,
-              -0.04102384,
-              -0.03860138,
-              -0.08042292,
-              0.023227168,
-              0.04191858,
-              -0.058156747,
-              0.0585743,
-              0.076342255,
-              4.465569e-34,
-              -0.019599343,
-              0.040230304,
-              0.01455632,
-              0.034345042,
-              0.04392999,
-              -0.023241352,
-              0.067749046,
-              -0.03010354,
-              -0.09075954,
-              -0.019227842,
-              -0.027724287,
-              -0.00062344945,
-              0.0042892746,
-              0.053643614,
-              0.04075099,
-              0.032581333,
-              -0.107116826,
-              -0.0500636,
-              -0.016655827,
-              -0.007782394,
-              -0.111523,
-              0.07476429,
-              -0.016019335,
-              -0.050536986,
-              -0.11320647,
-              -0.0061384854,
-              0.050886273,
-              -0.030283457,
-              0.04318923,
-              0.03301474,
-              0.02362771,
-              0.046507858,
-              -0.03416386,
-              0.036145207,
-              0.023037339,
-              -0.026803765,
-              0.06361122,
-              0.09975251,
-              0.035269737,
-              0.1554014,
-              0.083479255,
-              0.10931981,
-              0.046847064,
-              -0.010136355,
-              -0.032541983,
-              0.12926093,
-              0.031193413,
-              -0.09971323,
-              0.010830718,
-              0.02325219,
-              -0.011917061,
-              0.010155018,
-              0.06883269,
-              0.009340846,
-              -0.022698723,
-              -0.042815465,
-              -0.048211087,
-              -0.085067384,
-              0.05105234,
-              0.045155898,
-              -0.03564869,
-              0.06549556,
-              0.048875004,
-              0.037915554,
-              -0.14071068,
-              -0.067095764,
-              0.009898252,
-              -0.0049653547,
-              -0.044304688,
-              0.0039006064,
-              -0.026903173,
-              -0.066124685,
-              0.040738244,
-              -0.052228633,
-              0.060485654,
-              -0.041119356,
-              -0.04312945,
-              -0.025152665,
-              0.08556276,
-              -0.044942576,
-              0.06393979,
-              -0.024227533,
-              -0.05052092,
-              -0.0020624825,
-              -0.078943975,
-              0.0026753,
-              0.02068896,
-              0.102683865,
-              -0.01237572,
-              0.056172684,
-              0.06552171,
-              0.030940128,
-              -0.07721113,
-              -0.061241012,
-              -0.016143149,
-              -1.3511957e-08,
-              -0.050416306,
-              -0.033628013,
-              0.046722032,
-              0.04744138,
-              -0.04411888,
-              0.04631675,
-              -0.0060847937,
-              -0.053873356,
-              0.013075445,
-              0.050437532,
-              -0.009895477,
-              -0.0041795173,
-              0.07229928,
-              0.021081135,
-              0.02672776,
-              -0.07482113,
-              -0.026757998,
-              0.052755926,
-              -0.034690056,
-              0.039811596,
-              -0.016370349,
-              0.045900222,
-              -0.02250936,
-              0.023861,
-              0.04912799,
-              0.09111738,
-              -0.0024878879,
-              0.049395334,
-              -0.03861115,
-              0.020867983,
-              0.076049894,
-              0.084881924,
-              -0.051956687,
-              -0.06878504,
-              -0.061384037,
-              0.077220954,
-              -0.06454818,
-              0.044513144,
-              0.008181126,
-              0.015890416,
-              -0.04280811,
-              0.005317184,
-              0.0034429359,
-              0.0031937633,
-              -0.013058055,
-              -0.09134677,
-              0.06425565,
-              -0.054977305,
-              0.0007087448,
-              -0.06258866,
-              -0.034974415,
-              -0.029966963,
-              0.044276785,
-              0.017868131,
-              -0.027976807,
-              -0.036579583,
-              0.021142753,
-              0.06057356,
-              -0.03133335,
-              -0.014331035,
-              0.034653842,
-              0.052315667,
-              -0.036585484,
-              0.028209662
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 3,
-          "total_tokens": 3
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json
+++ b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json
@ -0,0 +1,84 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_tool_without_schema[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Call the no args tool"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "no_args_tool",
+            "description": "Tool with no arguments",
+            "parameters": {
+              "type": "object",
+              "properties": {}
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-452805c3c859",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants me to call the no args tool. Let me check the available functions. There's only one tool provided, which is the no_args_tool with no arguments. Since the user didn't specify any parameters, I should just return the tool call as instructed. I need to make sure the JSON is correctly formatted and within the XML tags. Alright, that's all I need.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-7a67269afe214c85924c5171612bbdbd",
+                  "function": {
+                    "arguments": "{}",
+                    "name": "no_args_tool"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 101,
+          "prompt_tokens": 136,
+          "total_tokens": 237,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/45de0b7bfdc88fd9a7b76b6f18c0a1b2c4953aa4791012e2f372430d5362c50b.json
+++ b/tests/integration/inference/recordings/45de0b7bfdc88fd9a7b76b6f18c0a1b2c4953aa4791012e2f372430d5362c50b.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "This is completely different content",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.050927628,
-              0.038399037,
-              -0.05559374,
-              -0.105984606,
-              0.06944504,
-              -0.08054001,
-              -0.025946686,
-              -0.045175657,
-              0.068730615,
-              0.016510814,
-              -0.0011700827,
-              0.023414683,
-              -0.0034143464,
-              0.06804153,
-              -0.021997927,
-              -0.014162646,
-              0.12356902,
-              -0.06536738,
-              -0.082627006,
-              0.04300477,
-              -0.039514318,
-              0.055434275,
-              -0.008866895,
-              0.020934915,
-              0.016280092,
-              0.09630312,
-              -0.022835929,
-              0.09175565,
-              0.06409549,
-              -0.06226981,
-              0.010888244,
-              0.07833004,
-              0.08844764,
-              -0.008459277,
-              -0.07542651,
-              0.04800223,
-              0.0042286967,
-              0.037884884,
-              0.0023502677,
-              0.032233667,
-              0.0047689923,
-              -0.070404515,
-              -0.06513966,
-              0.061046362,
-              0.021522248,
-              0.10113185,
-              -0.07537441,
-              -0.04074795,
-              -0.0055522234,
-              -0.0037093374,
-              -0.021283673,
-              -0.018193243,
-              -0.03323253,
-              -0.015658593,
-              0.0032862085,
-              0.037399907,
-              -0.021028537,
-              0.052572608,
-              0.10211333,
-              -0.018634265,
-              0.03612266,
-              0.08958185,
-              0.050681055,
-              0.019839589,
-              0.10220134,
-              -0.059074707,
-              -0.045562137,
-              -0.024107283,
-              -0.059917513,
-              -0.09795064,
-              -0.002078402,
-              0.032211803,
-              0.04863422,
-              0.08062527,
-              0.022614514,
-              0.0005379622,
-              -0.0015465368,
-              0.010018953,
-              -0.089729026,
-              0.023838207,
-              -0.015227461,
-              -0.020540234,
-              0.08525423,
-              -0.08025672,
-              -0.002200058,
-              0.0649954,
-              -0.023069935,
-              -0.06201302,
-              -0.06545048,
-              -0.029986514,
-              0.0045501734,
-              0.09718718,
-              0.09153336,
-              -0.0059684636,
-              -0.048185453,
-              -0.011855243,
-              -0.03170323,
-              -0.010363732,
-              0.029717747,
-              0.103405535,
-              -0.029072085,
-              0.005597891,
-              -0.03075466,
-              -0.011073092,
-              -0.038647823,
-              -0.01590583,
-              0.0008562756,
-              0.03479237,
-              0.0039463183,
-              -0.020063022,
-              -0.048164852,
-              0.026510539,
-              -0.061183933,
-              -0.046969693,
-              0.02144617,
-              -0.048452575,
-              0.02205527,
-              0.015723849,
-              0.056344535,
-              0.055321235,
-              0.037136998,
-              -0.08872732,
-              0.011813868,
-              0.0064246035,
-              -0.020590257,
-              -0.059401207,
-              0.012338125,
-              -2.4301395e-33,
-              0.068363585,
-              -0.05303797,
-              0.011494271,
-              0.06953355,
-              0.013304427,
-              0.0020351785,
-              -0.020783585,
-              0.028951883,
-              0.034663863,
-              -0.03274387,
-              0.00095708756,
-              0.008672852,
-              0.007618213,
-              -0.024579093,
-              0.030253874,
-              -0.034167152,
-              -0.0315152,
-              0.1105276,
-              0.03499844,
-              0.045135163,
-              0.00044455956,
-              0.051429555,
-              0.015050582,
-              -0.009024664,
-              0.023132037,
-              0.05141033,
-              -0.00417506,
-              0.004720958,
-              -0.016197585,
-              -0.025692327,
-              -0.024077175,
-              -0.00953031,
-              0.05060433,
-              -0.058328744,
-              0.04903431,
-              0.07964924,
-              0.03599398,
-              -0.065374464,
-              -0.035382472,
-              -0.07028972,
-              -0.009750123,
-              -0.031909473,
-              -0.04101604,
-              -0.041144423,
-              -0.036323845,
-              0.06685511,
-              0.016679594,
-              -0.048498012,
-              -0.015474575,
-              -0.00048608257,
-              0.03267068,
-              -0.010890426,
-              0.016646467,
-              -0.057286758,
-              0.008073807,
-              0.008808943,
-              -0.061580453,
-              -0.010815387,
-              0.0717443,
-              0.08607838,
-              0.014073375,
-              0.014896061,
-              -0.098295614,
-              -0.046653833,
-              0.033601493,
-              0.0647405,
-              -0.007525925,
-              0.025440095,
-              0.04171436,
-              -0.033113986,
-              -0.014553822,
-              0.024878975,
-              0.045614205,
-              -0.042929318,
-              -0.040504646,
-              -0.06304663,
-              -0.022389242,
-              0.010583584,
-              -0.032525852,
-              -0.03146621,
-              0.0081922775,
-              0.021094568,
-              0.0095269885,
-              -0.08290188,
-              -0.021351986,
-              0.008777032,
-              0.060185786,
-              -0.062182017,
-              0.004518251,
-              0.05684528,
-              -0.013033095,
-              0.01867297,
-              -0.008998785,
-              -0.076766245,
-              0.051622886,
-              1.6926977e-33,
-              -0.12588808,
-              0.011676749,
-              -0.079886116,
-              0.02304184,
-              0.029238446,
-              0.08721121,
-              0.06906221,
-              0.032533444,
-              0.047794122,
-              0.13212898,
-              0.03129717,
-              -0.0125368,
-              0.0035920327,
-              -0.016413208,
-              -0.038557872,
-              0.016005918,
-              0.09166447,
-              0.047558285,
-              -0.054981478,
-              0.06797876,
-              0.017968502,
-              0.118666455,
-              -0.069318265,
-              0.043814093,
-              0.04150938,
-              -0.017812226,
-              0.051738504,
-              0.06795029,
-              0.080493495,
-              0.005386888,
-              0.08878265,
-              -0.036075104,
-              -0.07708273,
-              -0.09101018,
-              -0.09597232,
-              -0.0937606,
-              -0.06200779,
-              0.06722552,
-              -0.0006647803,
-              0.029067127,
-              0.08179574,
-              -0.06488274,
-              -0.050375167,
-              -0.002403243,
-              -0.026110265,
-              -0.007630271,
-              0.011972527,
-              -0.08573929,
-              0.04107404,
-              0.024723932,
-              -0.02222756,
-              -0.11560156,
-              0.006753066,
-              -0.04589066,
-              -0.06369223,
-              0.053635046,
-              0.005769477,
-              0.06325056,
-              0.0048679966,
-              -0.057087842,
-              0.041931894,
-              0.022344982,
-              -0.14709935,
-              0.026361033,
-              0.106274396,
-              -0.0059068515,
-              0.020035667,
-              0.034950804,
-              -0.03342695,
-              -0.03884034,
-              -0.076072656,
-              -0.11173452,
-              -0.038953967,
-              -0.10270519,
-              0.04714134,
-              -0.049391687,
-              0.074747935,
-              0.041724026,
-              -0.031083144,
-              0.0033830043,
-              0.055804495,
-              -0.031882074,
-              -0.02541756,
-              0.050101582,
-              0.035991114,
-              0.09143438,
-              -0.07581111,
-              -0.050589707,
-              0.0074097887,
-              -0.0014020415,
-              -0.05036443,
-              -0.0015289022,
-              0.005471816,
-              0.07689256,
-              0.014164922,
-              -1.8297508e-08,
-              0.029913928,
-              -0.057959806,
-              -0.06846765,
-              0.026196472,
-              -0.0035178436,
-              0.11374637,
-              0.056845777,
-              -0.09315407,
-              0.0027757618,
-              0.10895455,
-              -0.033027817,
-              0.005051668,
-              -0.043633904,
-              -0.048978273,
-              0.011912417,
-              0.059747256,
-              -0.08661686,
-              -0.052748058,
-              0.026321623,
-              0.042173225,
-              -0.0035451513,
-              0.03797019,
-              0.022595786,
-              -0.0614702,
-              0.01268269,
-              0.040893063,
-              -0.084825225,
-              0.041167296,
-              -0.038163006,
-              0.008364558,
-              0.01014753,
-              0.024994388,
-              -0.012504467,
-              -0.045078665,
-              0.0102669485,
-              -0.046302866,
-              0.061438397,
-              0.016235871,
-              -0.0011558776,
-              0.007455159,
-              -0.019448454,
-              -0.06798961,
-              0.05472832,
-              0.09646006,
-              -0.04711737,
-              0.060088705,
-              0.0030213061,
-              -0.08877283,
-              0.037262574,
-              -0.009947699,
-              0.0035697597,
-              -0.07833652,
-              0.02169359,
-              -0.013075168,
-              0.072521746,
-              -0.0649658,
-              -0.029920656,
-              -0.017777385,
-              0.033904497,
-              0.02919506,
-              0.08793891,
-              0.008437021,
-              0.064442866,
-              -0.01656208
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 5,
-          "total_tokens": 5
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json
+++ b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json
@ -0,0 +1,92 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_chat_completion_with_tools[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo?"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get weather information",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "City name"
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-496035259763",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking about the weather in Tokyo. I need to use the get_weather function for that. The function requires the location parameter, which in this case is Tokyo. I should make sure to specify \"Tokyo\" as the location. Let me check if there are any other parameters needed, but no, the function only needs the location. So the tool call should be straightforward. I'll format the JSON correctly inside the tool_call tags.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-959b557fa67e4134a2391f5d35e5d5ae",
+                  "function": {
+                    "arguments": "{\"location\": \"Tokyo\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 117,
+          "prompt_tokens": 158,
+          "total_tokens": 275,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json
+++ b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json
@ -0,0 +1,92 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_format_preserves_complex_schemas[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Process this data"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "process_data",
+            "description": "Process structured data",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "data": {
+                  "$ref": "#/$defs/DataObject"
+                }
+              },
+              "$defs": {
+                "DataObject": {
+                  "type": "object",
+                  "properties": {
+                    "values": {
+                      "type": "array",
+                      "items": {
+                        "type": "number"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-524ead18daad",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants me to process the data. Let me check the available tools. There's a function called process_data that takes an object with a 'data' parameter. The data is an array of numbers. But the user hasn't provided any specific data yet. They just said \"Process this data.\" Hmm, maybe they expect me to prompt them for the data first. Wait, maybe there's a misunderstanding. Did they include the data in the conversation history? Let me look back. The user's message is \"Process this data.\" No data provided. Oh, maybe they made a mistake and forgot to include it. I need to ask them to provide the data so I can proceed. Let me confirm if there's any data mentioned. No, the current input is just the instruction. So I should ask the user to supply the data array of numbers to process.\n</think>\n\nPlease provide the structured data you'd like me to process. For example, an array of numbers like `[1, 2, 3]`.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 212,
+          "prompt_tokens": 180,
+          "total_tokens": 392,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/54f1997f5ea8c22d5655cd5718f3c5c19da209925e3df713287db28e7830f02b.json
+++ b/tests/integration/inference/recordings/54f1997f5ea8c22d5655cd5718f3c5c19da209925e3df713287db28e7830f02b.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.205358-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/640984c9321e94cb3c6d24ab4af1e83e628af419d09d77948f2480651fa472f2.json
+++ b/tests/integration/inference/recordings/640984c9321e94cb3c6d24ab4af1e83e628af419d09d77948f2480651fa472f2.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:11.182572-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json
+++ b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json
--- a/tests/integration/inference/recordings/6ea16771b09b4966fa1dfb0349a83f6e11a6716cf3bd9d6e711ed7497232c152.json
+++ b/tests/integration/inference/recordings/6ea16771b09b4966fa1dfb0349a83f6e11a6716cf3bd9d6e711ed7497232c152.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Test encoding format",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.019109152,
-              -0.0205217,
-              -0.071471564,
-              -0.023057504,
-              -0.06572786,
-              -0.0057331678,
-              -0.029395059,
-              -0.031822033,
-              -0.015748156,
-              -0.039123703,
-              0.02694331,
-              -0.0641754,
-              0.013510709,
-              0.050364953,
-              -0.03114308,
-              -0.08322274,
-              -0.03192984,
-              0.074970365,
-              -0.016377378,
-              -0.0013804765,
-              0.03850419,
-              -0.03441017,
-              -0.0048610102,
-              -0.03094053,
-              0.051915165,
-              0.009193639,
-              0.0071807485,
-              0.066353165,
-              0.024559105,
-              -0.04767663,
-              0.0376255,
-              -0.042586852,
-              0.078906916,
-              0.04827334,
-              0.13389648,
-              0.013978803,
-              0.03242126,
-              -0.08890431,
-              -0.014188366,
-              0.03553346,
-              -0.02476171,
-              -0.028628638,
-              0.047652308,
-              0.026259335,
-              0.048472118,
-              0.06663718,
-              -0.013584004,
-              0.071824096,
-              -0.073066786,
-              -0.050326068,
-              0.0039502876,
-              0.03300394,
-              -0.047816053,
-              -0.017657546,
-              0.010284664,
-              -0.10525716,
-              -0.010034394,
-              0.014627846,
-              -0.053289402,
-              0.060343288,
-              -0.10079798,
-              0.011359217,
-              -0.007258805,
-              0.05346498,
-              -0.0068726647,
-              0.03697505,
-              0.024016414,
-              0.023924585,
-              -0.011357761,
-              -0.119573325,
-              -0.115692526,
-              -0.06673285,
-              -0.04233929,
-              0.09302018,
-              0.02486003,
-              0.084047645,
-              0.0030104683,
-              -0.06605523,
-              0.027435688,
-              -0.032412402,
-              -0.025584543,
-              -0.06590182,
-              0.067799605,
-              0.0976311,
-              0.07360619,
-              0.034108408,
-              0.056534845,
-              0.076705806,
-              -0.05179011,
-              0.053681813,
-              0.0054462817,
-              0.015972052,
-              0.0035656213,
-              0.06333522,
-              -0.01597322,
-              0.05295729,
-              0.11539089,
-              0.055200845,
-              0.037667733,
-              0.08083974,
-              0.035557732,
-              -0.07982552,
-              -0.012100598,
-              -0.07612801,
-              -0.0695667,
-              -0.017815348,
-              0.16996554,
-              -0.0048157335,
-              0.09073964,
-              -0.07196438,
-              0.020009195,
-              -0.05956153,
-              -0.06312686,
-              -0.07716358,
-              0.0150949685,
-              -0.050339524,
-              -0.05444592,
-              -0.023078114,
-              -0.035431463,
-              -0.030625492,
-              -0.053284056,
-              -0.06745872,
-              -0.08049862,
-              0.002800386,
-              -0.0114065055,
-              -0.029938627,
-              0.024243163,
-              -1.5107368e-33,
-              -0.02984805,
-              -0.00033025863,
-              0.0030491,
-              0.023082128,
-              -0.04808977,
-              -0.0027841914,
-              -0.037461873,
-              0.016201235,
-              -0.02998979,
-              0.015712254,
-              0.009664366,
-              -0.03984875,
-              -0.029493092,
-              0.03837007,
-              -0.005226541,
-              0.06857773,
-              -0.007891026,
-              -0.0019036188,
-              -0.035219382,
-              0.03627955,
-              0.05867878,
-              0.023777487,
-              0.044425115,
-              -0.025999734,
-              -0.025318418,
-              -0.02685328,
-              -0.02368557,
-              -0.094386704,
-              0.0016880591,
-              0.0065193563,
-              -0.09711005,
-              -0.053493332,
-              -0.08241291,
-              0.023502836,
-              -0.02407441,
-              0.015992055,
-              0.0050546136,
-              0.030476829,
-              -0.088438906,
-              0.11427086,
-              0.028378993,
-              0.02985018,
-              0.022821706,
-              0.018776013,
-              0.056330692,
-              -0.020254886,
-              -0.00070521404,
-              -0.0864014,
-              0.020228866,
-              -0.0039839754,
-              0.0010032665,
-              0.065425254,
-              -0.036518592,
-              0.032341316,
-              0.023112345,
-              0.044507477,
-              0.09644409,
-              -0.07272818,
-              0.03370691,
-              0.042783204,
-              -0.052776046,
-              0.0003352446,
-              0.061005518,
-              -0.019623613,
-              -0.023274273,
-              -0.11602989,
-              0.007926991,
-              -0.12529127,
-              0.017030548,
-              0.013484081,
-              -0.030528491,
-              -0.024298145,
-              0.006284904,
-              -0.015568167,
-              -0.072781205,
-              0.012985074,
-              0.015977127,
-              0.0051657534,
-              -0.0026022948,
-              -0.059578825,
-              0.06372584,
-              -0.0019363016,
-              0.018695941,
-              -0.009242735,
-              -0.05887247,
-              -0.032524884,
-              -0.009591115,
-              -0.047377545,
-              0.020585002,
-              -0.007134836,
-              0.050135154,
-              0.016087264,
-              -0.0058878902,
-              -0.07661024,
-              0.0820671,
-              1.6053074e-33,
-              -0.0056476775,
-              0.06719423,
-              -0.011510322,
-              0.05586423,
-              -0.08886697,
-              -0.036528286,
-              0.12134926,
-              0.028969096,
-              0.022419011,
-              0.047327086,
-              0.07621525,
-              -0.07937209,
-              0.0020504447,
-              -0.023489932,
-              -0.029759271,
-              -0.04879825,
-              -0.034876924,
-              0.06461666,
-              0.051493492,
-              0.008284975,
-              -0.031793926,
-              0.098015875,
-              0.008122038,
-              0.01032072,
-              0.059404474,
-              0.05176487,
-              0.042960417,
-              0.0069373515,
-              0.027306866,
-              0.039226852,
-              0.062416088,
-              0.051797673,
-              0.0053232666,
-              0.05965781,
-              -0.008935817,
-              -0.0135501,
-              0.08726531,
-              0.028408607,
-              -0.006820522,
-              0.052098107,
-              0.049510423,
-              0.055176627,
-              -0.016774576,
-              0.077848226,
-              0.026121203,
-              0.031311177,
-              0.011812256,
-              -0.0341528,
-              0.052825138,
-              0.003484205,
-              0.09811821,
-              0.029693138,
-              -0.031354938,
-              -0.012068096,
-              0.018686052,
-              -0.032609653,
-              -0.09638639,
-              0.033928476,
-              -0.07897009,
-              -0.008300913,
-              -0.04915284,
-              0.02006342,
-              0.061743837,
-              -0.018412542,
-              -0.033583082,
-              -0.090903476,
-              0.021116566,
-              -0.022445552,
-              -0.011814237,
-              -0.048816226,
-              0.048287436,
-              -0.07294675,
-              -0.02198573,
-              0.062477604,
-              0.023308119,
-              -0.052141402,
-              -0.05409648,
-              0.062339973,
-              0.052301563,
-              0.051384836,
-              -0.02426406,
-              -0.018824687,
-              -0.01660311,
-              0.09330242,
-              0.008502433,
-              0.063408315,
-              0.019377569,
-              0.047027417,
-              -0.0058769877,
-              -0.0034505578,
-              0.07956527,
-              0.10210641,
-              0.015302805,
-              0.04089992,
-              0.038895626,
-              -1.2710905e-08,
-              -0.019304764,
-              -0.1217849,
-              -0.047983564,
-              -0.053382736,
-              -0.113197215,
-              0.05181196,
-              -0.10498226,
-              -0.08524135,
-              0.0061870585,
-              -0.029899841,
-              0.064561576,
-              -0.028730206,
-              -0.064735174,
-              -0.024887148,
-              0.0026119591,
-              -0.008796896,
-              0.030246036,
-              0.009807871,
-              0.0044631795,
-              0.0851423,
-              -0.026132204,
-              0.11360852,
-              -0.0045760865,
-              -0.036643907,
-              -0.09078616,
-              0.081466354,
-              0.012066122,
-              0.07288108,
-              0.004079195,
-              -0.05064171,
-              0.068772145,
-              0.029108258,
-              0.014786602,
-              -0.11868081,
-              -0.05042858,
-              0.05376578,
-              0.04570744,
-              0.074074544,
-              0.028540619,
-              0.03937392,
-              0.0291862,
-              -0.035710927,
-              -0.09132387,
-              -0.047720414,
-              -0.00082342024,
-              -0.073688805,
-              0.011024812,
-              0.015703982,
-              -0.03590976,
-              -0.08121826,
-              0.020365681,
-              -0.045287356,
-              -0.024955628,
-              0.001167751,
-              0.00037544646,
-              -0.026392939,
-              -0.032434102,
-              0.003407464,
-              -0.007060387,
-              0.024250468,
-              0.076347135,
-              0.039537415,
-              0.036043648,
-              -0.07085338
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 3,
-          "total_tokens": 3
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/73901e939ef12a9e015f47ad3886700de695975e8d1f43a22b580d788aa8e642.json
+++ b/tests/integration/inference/recordings/73901e939ef12a9e015f47ad3886700de695975e8d1f43a22b580d788aa8e642.json
@ -1,54 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Test dimensions parameter",
-      "encoding_format": "base64",
-      "dimensions": 16
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.25369987,
-              0.016355688,
-              -0.29676768,
-              0.316427,
-              -0.18642858,
-              0.076206245,
-              -0.031503417,
-              0.29860005,
-              -0.496603,
-              -0.36621967,
-              0.25334543,
-              -0.333392,
-              0.005993569,
-              0.14079759,
-              -0.13775977,
-              -0.14680246
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 3,
-          "total_tokens": 3
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json
+++ b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json
@ -0,0 +1,98 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "max_tokens": 4096,
+      "stream": false,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-744052775cf9",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which is provided as San Francisco, CA. I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again. The location is required, so I can't omit it. I'll structure the tool call with the name \"get_weather\" and the arguments including \"location\": \"San Francisco, CA\". That should get the current weather information for the user.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-b59dc311dd914d3dbd6d455b122bc39c",
+                  "function": {
+                    "arguments": "{\"location\": \"San Francisco, CA\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 138,
+          "prompt_tokens": 185,
+          "total_tokens": 323,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json
+++ b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json
@ -0,0 +1,67 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tool_choice_none[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "max_tokens": 4096,
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-77cf21828360",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to check the current weather conditions. But wait, I can't access real-time data. I should mention that I can't provide the current weather forecast and ask them to check a reliable source like the National Weather Service or a weather app. Also, maybe suggest they can provide more details if they need help with something else related to the weather.\n</think>\n\nI'm sorry, but I can't provide real-time weather information. However, you can check the current weather for San Francisco, CA using the National Weather Service (NWS) website, weather apps like Weather.com, or local meteorological services. Let me know if there's anything else I can assist with!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 154,
+          "prompt_tokens": 33,
+          "total_tokens": 187,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/78aa717a1d0a499a1cc065e95896b63af46cc13f5a6da2346502a16d55aab891.json
+++ b/tests/integration/inference/recordings/78aa717a1d0a499a1cc065e95896b63af46cc13f5a6da2346502a16d55aab891.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:11.451164-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/822c192e060366dd978158aebff1c2dcdb9ce6c923a245db07314151e9d397c2.json
+++ b/tests/integration/inference/recordings/822c192e060366dd978158aebff1c2dcdb9ce6c923a245db07314151e9d397c2.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:11.182572-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/834be37d56251727f6bdcafeafbc3846670d41955cc46a693767cbc67bed9ae2.json
+++ b/tests/integration/inference/recordings/834be37d56251727f6bdcafeafbc3846670d41955cc46a693767cbc67bed9ae2.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:10.878462-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json
+++ b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json
@ -0,0 +1,128 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_multiple_tools_with_different_schemas[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Use one of the available tools"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "simple",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "x": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "complex",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "data": {
+                  "$ref": "#/$defs/Complex"
+                }
+              },
+              "$defs": {
+                "Complex": {
+                  "type": "object",
+                  "properties": {
+                    "nested": {
+                      "type": "array",
+                      "items": {
+                        "type": "number"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "with_output",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "input": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-853f6a700b98",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, let's see. The user wants me to use one of the available tools. The tools provided are simple, complex, and with_output. The simple function takes an argument 'x' of type string. The complex function requires a 'data' parameter that's an object with a nested array of numbers. The with_output function takes an input string.\n\nThe user's query is about using a tool, but there's no specific function name mentioned. Wait, maybe the user expects me to choose one based on the context. Since the tools are available, but the query is a general instruction, perhaps I should ask for clarification. However, the instructions say to use one of the tools if possible. Since the user hasn't specified a particular function, maybe I should check if there's any implicit function needed. But looking at the tools, none are directly related to the query. The user might need to specify which tool to use. Alternatively, maybe the answer is to call the simple function with an example input. But without more context, it's hard to tell. Wait, maybe the user expects me to choose the simplest one. Let's go with the simple function first. So the tool call would be to the simple function with x set to some value. But the user hasn't provided a specific value. Maybe I should state that the tool requires a value. But according to the instructions, if possible, use one of the tools. Since the user hasn't given a value, perhaps the answer is to call the simple function with an example. But the parameters for the simple function require 'x' which is a string. Maybe the user expects me to proceed without needing more info. So I'll proceed by calling the simple function with x as \"example\".\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-12e2ba0189cf484bb936cbb254a5c32a",
+                  "function": {
+                    "arguments": "{\"x\": \"example\"}",
+                    "name": "simple"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 378,
+          "prompt_tokens": 265,
+          "total_tokens": 643,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/882995aa7e9d3bfa67a81c6c3e93091b566a2a8d328dc143a985e95a3a5453af.json
+++ b/tests/integration/inference/recordings/882995aa7e9d3bfa67a81c6c3e93091b566a2a8d328dc143a985e95a3a5453af.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.320154-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/9186cbbe565a752223cd841b490891989401b4f24e4a8dc6bb7a5a40a126a45e.json
+++ b/tests/integration/inference/recordings/9186cbbe565a752223cd841b490891989401b4f24e4a8dc6bb7a5a40a126a45e.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:11.451164-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/9465f0987cb8c06e103eab7b2200bd01a96388106e7d8e25e3e86a61f682e593.json
+++ b/tests/integration/inference/recordings/9465f0987cb8c06e103eab7b2200bd01a96388106e7d8e25e3e86a61f682e593.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.489695-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/98552969974e8929e79a8311fad2fc801ea4789d4bb75c5b9639ec61c1abbaa5.json
+++ b/tests/integration/inference/recordings/98552969974e8929e79a8311fad2fc801ea4789d4bb75c5b9639ec61c1abbaa5.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:06.037921-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json
+++ b/tests/integration/inference/recordings/99bf0054f11a9c58c13a44f9cf962a706ebe85e2a5fe637ddad558cbaafe92d8.json
@ -0,0 +1,103 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestMCPToolsInChatCompletion::test_mcp_tools_in_inference[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Calculate 5 + 3"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "calculate",
+            "description": "",
+            "parameters": {
+              "properties": {
+                "x": {
+                  "title": "X",
+                  "type": "number"
+                },
+                "y": {
+                  "title": "Y",
+                  "type": "number"
+                },
+                "operation": {
+                  "title": "Operation",
+                  "type": "string"
+                }
+              },
+              "required": [
+                "x",
+                "y",
+                "operation"
+              ],
+              "title": "calculateArguments",
+              "type": "object"
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-99bf0054f11a",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants to calculate 5 plus 3. Let me check the tools provided. The only function available is 'calculate', which requires x, y, and operation. The parameters are numbers and an operation. The user input is straightforward: 5 + 3. So I need to call the 'calculate' function with x=5, y=3, and operation='+'. That should give the correct result. I don't see any other parameters needed here. Just make sure the JSON is correctly formatted with the required fields.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-6d1a92899a8246bb8fae5682dc08590c",
+                  "function": {
+                    "arguments": "{\"x\": 5, \"y\": 3, \"operation\": \"+\"}",
+                    "name": "calculate"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 144,
+          "prompt_tokens": 193,
+          "total_tokens": 337,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/a0a422b79669ea39c32e7a53c56b36d901910870f11369fc99efc2a15b102c86.json
+++ b/tests/integration/inference/recordings/a0a422b79669ea39c32e7a53c56b36d901910870f11369fc99efc2a15b102c86.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:11.182572-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/ac616df0bbdab503cfae59b25de52a21a382b732923bcd966e117b377f7fb25a.json
+++ b/tests/integration/inference/recordings/ac616df0bbdab503cfae59b25de52a21a382b732923bcd966e117b377f7fb25a.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:32:10.118228-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/ac90bcc1058738fe34c4a84bbe180ba527cd14410356f232bced80a05c23ff14.json
+++ b/tests/integration/inference/recordings/ac90bcc1058738fe34c4a84bbe180ba527cd14410356f232bced80a05c23ff14.json
--- a/tests/integration/inference/recordings/b297beb1d35f63da20522290488dd51cc2b20b158bb1f3c65b0b6a9476d7f6be.json
+++ b/tests/integration/inference/recordings/b297beb1d35f63da20522290488dd51cc2b20b158bb1f3c65b0b6a9476d7f6be.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "This is the first text",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              -0.0010839553,
-              0.067364,
-              0.015185306,
-              0.037240896,
-              0.029337138,
-              0.015160007,
-              0.0743005,
-              -0.0032980628,
-              0.06581814,
-              -0.021851996,
-              0.034412965,
-              0.051005766,
-              -0.011422501,
-              -0.025062356,
-              -0.051756065,
-              0.027193472,
-              0.07849549,
-              -0.05999108,
-              0.010471458,
-              -0.003400683,
-              0.043449093,
-              0.122919865,
-              9.668583e-05,
-              0.002153268,
-              0.018064681,
-              0.045069378,
-              -0.09762388,
-              0.11186886,
-              0.049657565,
-              -0.03485217,
-              -0.039568134,
-              0.003532146,
-              0.15894793,
-              0.06341193,
-              0.047953114,
-              0.011617699,
-              0.009799243,
-              0.015377702,
-              0.009379663,
-              0.079989135,
-              0.019207356,
-              -0.13718612,
-              0.01730099,
-              0.013687199,
-              0.014266827,
-              -0.00022628276,
-              -0.017710257,
-              0.02291068,
-              0.03590651,
-              -0.015361055,
-              -0.00978436,
-              -0.0401825,
-              -0.011481894,
-              0.00014050963,
-              0.08540761,
-              0.08730027,
-              0.0046967245,
-              0.006164595,
-              0.003031956,
-              0.008891807,
-              -0.006260525,
-              0.05061661,
-              0.0005252785,
-              0.0467754,
-              0.09363822,
-              -0.012814104,
-              0.017708639,
-              -0.062698044,
-              -0.11535818,
-              0.041123625,
-              -0.014939021,
-              0.044815876,
-              -0.020868087,
-              0.042999975,
-              -0.061038766,
-              0.019998673,
-              -0.068740115,
-              -0.035516046,
-              0.041884515,
-              0.012185281,
-              -0.029084096,
-              -0.06643917,
-              0.030638866,
-              0.05149607,
-              -0.12815061,
-              0.06821646,
-              -0.047070153,
-              -0.032925386,
-              0.007499353,
-              -0.017841771,
-              0.038296465,
-              -0.015792726,
-              0.07054022,
-              0.038072467,
-              -0.11428876,
-              0.04210153,
-              -0.11162366,
-              -0.045723915,
-              -0.028951947,
-              0.12735675,
-              -0.013946637,
-              -0.027157523,
-              0.07295939,
-              0.024098422,
-              -0.054050542,
-              -0.13125896,
-              0.03013205,
-              -0.023223283,
-              -0.019072957,
-              -0.007864101,
-              -0.021954412,
-              -0.05329901,
-              -0.07088355,
-              -0.0115214065,
-              -0.023399564,
-              -0.015638318,
-              0.05148062,
-              0.029261008,
-              0.06481798,
-              0.064031154,
-              0.014445124,
-              -0.058017716,
-              -0.069921836,
-              -0.023950975,
-              -0.08490842,
-              -0.08779567,
-              0.048162255,
-              -6.1240354e-33,
-              0.010315817,
-              0.038685724,
-              0.0031864564,
-              0.0357421,
-              0.0050265454,
-              -0.004210234,
-              -0.053900674,
-              -0.02988569,
-              -0.07548199,
-              -0.078777455,
-              -0.012271205,
-              -0.05056629,
-              0.020729113,
-              -0.051866043,
-              -0.059254467,
-              -0.059903424,
-              -0.055699438,
-              0.032196835,
-              -0.006328442,
-              -0.021668624,
-              -0.059921067,
-              0.0519611,
-              0.051227964,
-              -0.063502096,
-              -0.04873505,
-              -0.014265467,
-              0.0025537873,
-              -0.024346355,
-              -0.0055181426,
-              0.02007461,
-              -0.10196586,
-              0.010727814,
-              -0.023194604,
-              -0.081025146,
-              -0.014997581,
-              0.0017926424,
-              0.045078833,
-              -0.052792255,
-              -0.05368693,
-              -0.013245513,
-              -0.019808132,
-              0.020031843,
-              -0.00081401254,
-              -0.10117647,
-              -0.0007066768,
-              0.09663035,
-              -0.03946875,
-              0.04954661,
-              0.042237334,
-              0.007943922,
-              -0.05234212,
-              0.051887065,
-              0.03711589,
-              0.034850314,
-              0.063441575,
-              -0.026583876,
-              -0.009227281,
-              -0.0025737104,
-              -0.056082893,
-              0.0020716325,
-              -0.020129146,
-              0.0012315192,
-              -0.0017609745,
-              0.019111704,
-              0.016572498,
-              -0.011374,
-              0.010381644,
-              -0.007864189,
-              0.04664868,
-              -0.046856377,
-              -0.08523834,
-              -0.008974813,
-              0.012022968,
-              0.013285977,
-              0.015182303,
-              0.03708482,
-              0.026587088,
-              0.014473839,
-              -0.013946565,
-              0.01999883,
-              -0.06888259,
-              -0.07111367,
-              0.012369427,
-              0.032828625,
-              -0.03152666,
-              0.045777358,
-              0.06801705,
-              -0.07747748,
-              0.018461134,
-              0.06620267,
-              -0.086365156,
-              0.008950603,
-              0.041320425,
-              0.009541193,
-              0.0066037327,
-              4.71081e-33,
-              -0.026172558,
-              0.0013145636,
-              -0.014140948,
-              -0.024360213,
-              0.06931815,
-              0.031448748,
-              0.037257418,
-              0.06468137,
-              0.049403396,
-              0.11072201,
-              0.04985356,
-              0.06679111,
-              0.04153249,
-              -0.034106053,
-              0.070283465,
-              0.034855895,
-              0.12902643,
-              -0.021033453,
-              0.008940618,
-              0.030177405,
-              -0.022881329,
-              0.036504544,
-              -0.13194299,
-              0.045612644,
-              -0.0127895875,
-              0.04174139,
-              0.1232064,
-              -0.013484046,
-              -0.007285246,
-              -0.029776007,
-              0.025007037,
-              -0.009516822,
-              0.02475585,
-              0.023208592,
-              -0.019141924,
-              0.02259424,
-              0.013740329,
-              -0.038490705,
-              -0.014461541,
-              0.075218394,
-              0.13589163,
-              0.009839605,
-              -0.037563317,
-              -0.02737327,
-              -0.016485116,
-              -0.048845276,
-              -0.03523722,
-              -0.05439929,
-              -0.0017957076,
-              0.03563579,
-              -0.010255764,
-              -0.01859244,
-              -0.03647324,
-              -0.055985246,
-              -0.007833892,
-              0.009086756,
-              -0.007333394,
-              0.050386623,
-              -0.0002305643,
-              -0.03637248,
-              -0.024937423,
-              0.058877032,
-              -0.07250415,
-              0.07401245,
-              0.053917013,
-              -0.051895224,
-              -0.006332244,
-              0.07850189,
-              -0.01695057,
-              -0.006673017,
-              0.012659739,
-              -0.014127065,
-              -0.13639799,
-              -0.08524976,
-              -0.017533274,
-              -0.0046930755,
-              0.013687301,
-              0.0009185522,
-              -0.0719948,
-              -0.06887779,
-              0.14208324,
-              0.03187123,
-              -0.055919908,
-              0.030401653,
-              0.061900012,
-              0.029921472,
-              -0.00096237566,
-              -0.065010294,
-              -0.020657646,
-              0.039562404,
-              -0.123846576,
-              0.0028867351,
-              0.051196404,
-              0.13397509,
-              -0.088453874,
-              -1.7590333e-08,
-              -0.025786474,
-              -0.080303885,
-              -0.09164947,
-              0.031999,
-              0.00584884,
-              0.11464121,
-              0.023377793,
-              -0.06902527,
-              -0.055941124,
-              -0.05787791,
-              0.014640494,
-              0.080320895,
-              0.0037027278,
-              -0.030824674,
-              0.024432683,
-              0.008549355,
-              -0.05291309,
-              -0.06636625,
-              0.0007468212,
-              -0.02379191,
-              0.030766092,
-              0.054053318,
-              -0.0027251292,
-              -0.09928475,
-              -0.0150488615,
-              0.016240431,
-              -0.0015727071,
-              0.01190173,
-              0.007895162,
-              0.04894733,
-              0.00487708,
-              0.08263861,
-              -0.014527478,
-              -0.043879665,
-              0.004633697,
-              0.024611989,
-              0.023827499,
-              0.02366802,
-              0.050754935,
-              -0.051841788,
-              0.0212632,
-              -0.0034418616,
-              -0.021175656,
-              0.020591663,
-              -0.06475325,
-              0.0542002,
-              0.027792262,
-              -0.05295982,
-              0.01509645,
-              -0.11977527,
-              -0.03416359,
-              -0.012206606,
-              0.047451705,
-              0.020876253,
-              -0.026368074,
-              0.01502373,
-              0.033982284,
-              0.059788153,
-              -0.052526973,
-              0.03356499,
-              0.061180886,
-              0.096336305,
-              0.116353564,
-              -0.016122948
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 5,
-          "total_tokens": 5
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/bf1b0d36e3c797174ca99e405ef09f32f61fe765095889f233ecf5fd23ed710b.json
+++ b/tests/integration/inference/recordings/bf1b0d36e3c797174ca99e405ef09f32f61fe765095889f233ecf5fd23ed710b.json
@ -1,57 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/c1f8c21e8bb0fdc82aa895363d223f58755d0723c39f52d16cfa8de573ec888f.json
+++ b/tests/integration/inference/recordings/c1f8c21e8bb0fdc82aa895363d223f58755d0723c39f52d16cfa8de573ec888f.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.395473-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json
+++ b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json
@ -0,0 +1,114 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_tool_with_complex_schema[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Book a flight from SFO to JFK for John Doe"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "book_flight",
+            "description": "Book a flight",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "flight": {
+                  "$ref": "#/$defs/FlightInfo"
+                },
+                "passenger": {
+                  "$ref": "#/$defs/Passenger"
+                }
+              },
+              "required": [
+                "flight",
+                "passenger"
+              ],
+              "$defs": {
+                "FlightInfo": {
+                  "type": "object",
+                  "properties": {
+                    "from": {
+                      "type": "string"
+                    },
+                    "to": {
+                      "type": "string"
+                    },
+                    "date": {
+                      "type": "string",
+                      "format": "date"
+                    }
+                  }
+                },
+                "Passenger": {
+                  "type": "object",
+                  "properties": {
+                    "name": {
+                      "type": "string"
+                    },
+                    "age": {
+                      "type": "integer"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-cfb292c0f41d",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants to book a flight from SFO to JFK for John Doe. Let me check the tools available. The provided function is book_flight, which requires flight information and a passenger. The parameters needed are flight (as a FlightInfo object) and passenger (with name and age). The user mentioned SFO to JFK, so the flight details are from and to. The passenger's name is John Doe, but the age isn't provided. Wait, the function parameters require the passenger's name and age, but the user only mentioned the name. Maybe the age is missing? But the user didn't specify it, so perhaps I should note that the age is required. However, the function's required parameters are flight and passenger, so even if age is missing, the function can't be called without it. So I need to include both flight info and passenger details. The user's message only gives the name and destination, not the flight details or age. Therefore, I need to ask for the flight details and the passenger's age. But the user hasn't provided those. So I can't proceed with the function call. Wait, but maybe the user expects me to assume some default? No, the function requires all parameters. Since the user hasn't provided flight details or age, I can't call the function. So the correct response is to prompt the user for those details.\n</think>\n\nThe booking requires the flight details and passenger's age. Could you provide the flight number and John Doe's age?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 310,
+          "prompt_tokens": 261,
+          "total_tokens": 571,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/d055e8aae3f1923cafd12fe3c02c7f7aec1c46ccfc035e5965eeac9b73bb2fb0.json
+++ b/tests/integration/inference/recordings/d055e8aae3f1923cafd12fe3c02c7f7aec1c46ccfc035e5965eeac9b73bb2fb0.json
--- a/tests/integration/inference/recordings/d28e69b9ec21c644f32ac1a57ac99371068a68248f010f146e8a744cc8eba9af.json
+++ b/tests/integration/inference/recordings/d28e69b9ec21c644f32ac1a57ac99371068a68248f010f146e8a744cc8eba9af.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Hello, world!",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              -0.038168654,
-              0.032873917,
-              -0.0055947267,
-              0.014366432,
-              -0.040310103,
-              -0.116643615,
-              0.031721067,
-              0.0019260457,
-              -0.04255802,
-              0.029198613,
-              0.04252229,
-              0.032184314,
-              0.029838374,
-              0.010959321,
-              -0.053805783,
-              -0.05028783,
-              -0.023449864,
-              0.0107550435,
-              -0.13774979,
-              0.0039929547,
-              0.029302042,
-              0.066712305,
-              -0.015410682,
-              0.048422653,
-              -0.08814465,
-              -0.012715775,
-              0.041334823,
-              0.040851083,
-              -0.050064698,
-              -0.05804616,
-              0.048728727,
-              0.06888658,
-              0.058795262,
-              0.008804153,
-              -0.016073612,
-              0.08514259,
-              -0.078146815,
-              -0.07741974,
-              0.020842256,
-              0.016201088,
-              0.032518543,
-              -0.05346469,
-              -0.062197812,
-              -0.024271712,
-              0.007416788,
-              0.024103774,
-              0.006469804,
-              0.051166162,
-              0.07284196,
-              0.034627657,
-              -0.05475476,
-              -0.059386417,
-              -0.0071934434,
-              0.020163197,
-              0.035816014,
-              0.0055927313,
-              0.010762318,
-              -0.05274177,
-              0.010083032,
-              -0.008742163,
-              -0.06284565,
-              0.038426206,
-              -0.013933317,
-              0.07342759,
-              0.09004579,
-              -0.07995627,
-              -0.016420787,
-              0.044767782,
-              -0.06886435,
-              -0.03303916,
-              -0.015482072,
-              0.011322529,
-              0.036461752,
-              0.066346884,
-              -0.05434455,
-              0.008740993,
-              0.012066104,
-              -0.038101126,
-              0.0069316486,
-              0.051146947,
-              0.07740579,
-              -0.122950904,
-              0.016380342,
-              0.049568996,
-              0.031634904,
-              -0.039637603,
-              0.0016715266,
-              0.009577405,
-              -0.032646418,
-              -0.033988595,
-              -0.13329837,
-              0.0072566303,
-              -0.010266605,
-              0.038557075,
-              -0.09338859,
-              -0.041706774,
-              0.069941126,
-              -0.026323376,
-              -0.14971305,
-              0.13445398,
-              0.03748492,
-              0.052825302,
-              0.0450506,
-              0.018712776,
-              0.05444322,
-              0.017282845,
-              -0.032480195,
-              0.04614526,
-              -0.046711974,
-              -0.030566413,
-              -0.01820007,
-              -0.04869831,
-              0.033051647,
-              -0.0038142777,
-              0.04999665,
-              -0.058270358,
-              -0.010011706,
-              0.010643473,
-              -0.040113144,
-              -0.0015507729,
-              0.060854245,
-              -0.045562096,
-              0.049257778,
-              0.02612153,
-              0.01981428,
-              -0.001660993,
-              0.059509434,
-              -6.525298e-33,
-              0.063519135,
-              0.0030875143,
-              0.028961418,
-              0.1733713,
-              0.0029763067,
-              0.027727291,
-              -0.0951315,
-              -0.031186627,
-              0.026689058,
-              -0.010807322,
-              0.023850724,
-              0.023777472,
-              -0.031174092,
-              0.049501278,
-              -0.025049716,
-              0.10175924,
-              -0.07919064,
-              -0.0032249284,
-              0.042915843,
-              0.09483459,
-              -0.06652636,
-              0.006303593,
-              0.02220902,
-              0.06999181,
-              -0.0074810013,
-              -0.0017734945,
-              0.027008688,
-              -0.07534615,
-              0.114036545,
-              0.008552313,
-              -0.023737878,
-              -0.04694563,
-              0.014472103,
-              0.019855395,
-              -0.0046694353,
-              0.0013555645,
-              -0.034298304,
-              -0.054142635,
-              -0.09419824,
-              -0.028909719,
-              -0.018876282,
-              0.0457315,
-              0.04761082,
-              -0.0030971593,
-              -0.033264168,
-              -0.013539523,
-              0.051041685,
-              0.031110944,
-              0.015244497,
-              0.054158635,
-              -0.08499706,
-              0.013360703,
-              -0.04759633,
-              0.07101136,
-              -0.0131114535,
-              -0.0023818254,
-              0.050331973,
-              -0.041642286,
-              -0.01419894,
-              0.032463223,
-              0.0053973934,
-              0.091275506,
-              0.0044798073,
-              -0.018260129,
-              -0.015278888,
-              -0.046306957,
-              0.038750377,
-              0.014729783,
-              0.05204642,
-              0.0017938613,
-              -0.014963651,
-              0.027101943,
-              0.031203475,
-              0.023725478,
-              -0.004601222,
-              0.03617344,
-              0.06679477,
-              -0.0018401983,
-              0.021265576,
-              -0.057589985,
-              0.019155758,
-              0.031437635,
-              -0.018444614,
-              -0.04085069,
-              0.10393101,
-              0.011960795,
-              -0.014898805,
-              -0.10520497,
-              -0.012302656,
-              -0.00043837292,
-              -0.09508398,
-              0.058318105,
-              0.042576887,
-              -0.025066672,
-              -0.094555676,
-              4.0072287e-33,
-              0.1322281,
-              0.0053512393,
-              -0.03312536,
-              -0.09096454,
-              -0.031562407,
-              -0.033949774,
-              -0.07205118,
-              0.1259232,
-              -0.08333555,
-              0.052797858,
-              0.001077506,
-              0.022004265,
-              0.10402767,
-              0.013034249,
-              0.04091762,
-              0.018705815,
-              0.11424037,
-              0.024799824,
-              0.014582492,
-              0.006205516,
-              -0.011202356,
-              -0.035756435,
-              -0.03800272,
-              0.011251353,
-              -0.0512988,
-              0.007890417,
-              0.06736164,
-              0.0033359542,
-              -0.09285096,
-              0.03704081,
-              -0.022326592,
-              0.039967872,
-              -0.030748183,
-              -0.011446819,
-              -0.014453254,
-              0.02498229,
-              -0.097532175,
-              -0.035378877,
-              -0.03757795,
-              -0.010181498,
-              -0.06392041,
-              0.025538994,
-              0.02061816,
-              0.03757256,
-              -0.1043548,
-              -0.028326731,
-              -0.05209465,
-              0.0128473425,
-              -0.051238894,
-              -0.029034877,
-              -0.09633617,
-              -0.042309195,
-              0.067165054,
-              -0.030870603,
-              -0.010357507,
-              0.027381465,
-              -0.028105576,
-              0.010302046,
-              0.04306986,
-              0.022315372,
-              0.007954779,
-              0.056068663,
-              0.04071972,
-              0.09293905,
-              0.016536433,
-              -0.053764775,
-              0.00047211433,
-              0.050708972,
-              0.042510226,
-              -0.029195962,
-              0.009274875,
-              -0.010647389,
-              -0.037209682,
-              0.002267011,
-              -0.030304702,
-              0.0745741,
-              0.0026207205,
-              -0.017582772,
-              0.0028797672,
-              0.038404796,
-              0.00723137,
-              0.045613218,
-              0.03998252,
-              0.014209623,
-              -0.0142997475,
-              0.05850862,
-              0.03630791,
-              0.055294298,
-              -0.020075988,
-              -0.08041808,
-              -0.030250112,
-              -0.014920701,
-              0.022349516,
-              0.011911506,
-              -0.06903851,
-              -1.8806734e-08,
-              -0.078480355,
-              0.046674173,
-              -0.023920896,
-              0.0634942,
-              0.02396477,
-              0.0014517035,
-              -0.090798445,
-              -0.06684978,
-              -0.0801405,
-              0.005503192,
-              0.053675175,
-              0.104841895,
-              -0.066848256,
-              0.015522683,
-              0.067097165,
-              0.070832625,
-              -0.03197915,
-              0.020843629,
-              -0.0219202,
-              -0.0073016756,
-              -0.010645817,
-              0.0040983153,
-              0.03313765,
-              -0.0790081,
-              0.03878132,
-              -0.075230986,
-              -0.015732396,
-              0.0060099233,
-              0.0051297406,
-              -0.061492138,
-              0.04202211,
-              0.09544608,
-              -0.04318599,
-              0.014424486,
-              -0.10617826,
-              -0.027963417,
-              0.011034413,
-              0.069576606,
-              0.06689785,
-              -0.07479674,
-              -0.07851099,
-              0.042766396,
-              -0.034639932,
-              -0.10607304,
-              -0.03577663,
-              0.051540814,
-              0.068673156,
-              -0.049959548,
-              0.015460458,
-              -0.064520314,
-              -0.076010585,
-              0.026035817,
-              0.07440218,
-              -0.012396022,
-              0.13329679,
-              0.074770845,
-              0.05134284,
-              0.020977058,
-              -0.026776016,
-              0.08894323,
-              0.039937407,
-              -0.04102053,
-              0.03194075,
-              0.018113315
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 4,
-          "total_tokens": 4
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/d2ba309413e85d6166f7543a879b890b4e65a5f9917a2d75c5795782ab7cbfff.json
+++ b/tests/integration/inference/recordings/d2ba309413e85d6166f7543a879b890b4e65a5f9917a2d75c5795782ab7cbfff.json
@ -1,48 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:8000/v1/v1/completions",
-    "headers": {},
-    "body": {
-      "model": "Qwen/Qwen3-0.6B",
-      "prompt": "I am feeling really sad today.",
-      "stream": false
-    },
-    "endpoint": "/v1/completions",
-    "model": "Qwen/Qwen3-0.6B"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.completion.Completion",
-      "__data__": {
-        "id": "rec-d2ba309413e8",
-        "choices": [
-          {
-            "finish_reason": "length",
-            "index": 0,
-            "logprobs": null,
-            "text": " I have been working on a project that I feel like I'm not doing well",
-            "stop_reason": null,
-            "prompt_logprobs": null
-          }
-        ],
-        "created": 0,
-        "model": "Qwen/Qwen3-0.6B",
-        "object": "text_completion",
-        "system_fingerprint": null,
-        "usage": {
-          "completion_tokens": 16,
-          "prompt_tokens": 7,
-          "total_tokens": 23,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": null
-        },
-        "service_tier": null,
-        "kv_transfer_params": null
-      }
-    },
-    "is_streaming": false
-  },
-  "id_normalization_mapping": {}
-}
--- a/tests/integration/inference/recordings/d5b0723b5cea73527f33a5f3c0508ca5ff9df6fb940311653cfab6d01a56d155.json
+++ b/tests/integration/inference/recordings/d5b0723b5cea73527f33a5f3c0508ca5ff9df6fb940311653cfab6d01a56d155.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "Hello, world!",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              -0.038168654,
-              0.032873917,
-              -0.0055947267,
-              0.014366432,
-              -0.040310103,
-              -0.116643615,
-              0.031721067,
-              0.0019260457,
-              -0.04255802,
-              0.029198613,
-              0.04252229,
-              0.032184314,
-              0.029838374,
-              0.010959321,
-              -0.053805783,
-              -0.05028783,
-              -0.023449864,
-              0.0107550435,
-              -0.13774979,
-              0.0039929547,
-              0.029302042,
-              0.066712305,
-              -0.015410682,
-              0.048422653,
-              -0.08814465,
-              -0.012715775,
-              0.041334823,
-              0.040851083,
-              -0.050064698,
-              -0.05804616,
-              0.048728727,
-              0.06888658,
-              0.058795262,
-              0.008804153,
-              -0.016073612,
-              0.08514259,
-              -0.078146815,
-              -0.07741974,
-              0.020842256,
-              0.016201088,
-              0.032518543,
-              -0.05346469,
-              -0.062197812,
-              -0.024271712,
-              0.007416788,
-              0.024103774,
-              0.006469804,
-              0.051166162,
-              0.07284196,
-              0.034627657,
-              -0.05475476,
-              -0.059386417,
-              -0.0071934434,
-              0.020163197,
-              0.035816014,
-              0.0055927313,
-              0.010762318,
-              -0.05274177,
-              0.010083032,
-              -0.008742163,
-              -0.06284565,
-              0.038426206,
-              -0.013933317,
-              0.07342759,
-              0.09004579,
-              -0.07995627,
-              -0.016420787,
-              0.044767782,
-              -0.06886435,
-              -0.03303916,
-              -0.015482072,
-              0.011322529,
-              0.036461752,
-              0.066346884,
-              -0.05434455,
-              0.008740993,
-              0.012066104,
-              -0.038101126,
-              0.0069316486,
-              0.051146947,
-              0.07740579,
-              -0.122950904,
-              0.016380342,
-              0.049568996,
-              0.031634904,
-              -0.039637603,
-              0.0016715266,
-              0.009577405,
-              -0.032646418,
-              -0.033988595,
-              -0.13329837,
-              0.0072566303,
-              -0.010266605,
-              0.038557075,
-              -0.09338859,
-              -0.041706774,
-              0.069941126,
-              -0.026323376,
-              -0.14971305,
-              0.13445398,
-              0.03748492,
-              0.052825302,
-              0.0450506,
-              0.018712776,
-              0.05444322,
-              0.017282845,
-              -0.032480195,
-              0.04614526,
-              -0.046711974,
-              -0.030566413,
-              -0.01820007,
-              -0.04869831,
-              0.033051647,
-              -0.0038142777,
-              0.04999665,
-              -0.058270358,
-              -0.010011706,
-              0.010643473,
-              -0.040113144,
-              -0.0015507729,
-              0.060854245,
-              -0.045562096,
-              0.049257778,
-              0.02612153,
-              0.01981428,
-              -0.001660993,
-              0.059509434,
-              -6.525298e-33,
-              0.063519135,
-              0.0030875143,
-              0.028961418,
-              0.1733713,
-              0.0029763067,
-              0.027727291,
-              -0.0951315,
-              -0.031186627,
-              0.026689058,
-              -0.010807322,
-              0.023850724,
-              0.023777472,
-              -0.031174092,
-              0.049501278,
-              -0.025049716,
-              0.10175924,
-              -0.07919064,
-              -0.0032249284,
-              0.042915843,
-              0.09483459,
-              -0.06652636,
-              0.006303593,
-              0.02220902,
-              0.06999181,
-              -0.0074810013,
-              -0.0017734945,
-              0.027008688,
-              -0.07534615,
-              0.114036545,
-              0.008552313,
-              -0.023737878,
-              -0.04694563,
-              0.014472103,
-              0.019855395,
-              -0.0046694353,
-              0.0013555645,
-              -0.034298304,
-              -0.054142635,
-              -0.09419824,
-              -0.028909719,
-              -0.018876282,
-              0.0457315,
-              0.04761082,
-              -0.0030971593,
-              -0.033264168,
-              -0.013539523,
-              0.051041685,
-              0.031110944,
-              0.015244497,
-              0.054158635,
-              -0.08499706,
-              0.013360703,
-              -0.04759633,
-              0.07101136,
-              -0.0131114535,
-              -0.0023818254,
-              0.050331973,
-              -0.041642286,
-              -0.01419894,
-              0.032463223,
-              0.0053973934,
-              0.091275506,
-              0.0044798073,
-              -0.018260129,
-              -0.015278888,
-              -0.046306957,
-              0.038750377,
-              0.014729783,
-              0.05204642,
-              0.0017938613,
-              -0.014963651,
-              0.027101943,
-              0.031203475,
-              0.023725478,
-              -0.004601222,
-              0.03617344,
-              0.06679477,
-              -0.0018401983,
-              0.021265576,
-              -0.057589985,
-              0.019155758,
-              0.031437635,
-              -0.018444614,
-              -0.04085069,
-              0.10393101,
-              0.011960795,
-              -0.014898805,
-              -0.10520497,
-              -0.012302656,
-              -0.00043837292,
-              -0.09508398,
-              0.058318105,
-              0.042576887,
-              -0.025066672,
-              -0.094555676,
-              4.0072287e-33,
-              0.1322281,
-              0.0053512393,
-              -0.03312536,
-              -0.09096454,
-              -0.031562407,
-              -0.033949774,
-              -0.07205118,
-              0.1259232,
-              -0.08333555,
-              0.052797858,
-              0.001077506,
-              0.022004265,
-              0.10402767,
-              0.013034249,
-              0.04091762,
-              0.018705815,
-              0.11424037,
-              0.024799824,
-              0.014582492,
-              0.006205516,
-              -0.011202356,
-              -0.035756435,
-              -0.03800272,
-              0.011251353,
-              -0.0512988,
-              0.007890417,
-              0.06736164,
-              0.0033359542,
-              -0.09285096,
-              0.03704081,
-              -0.022326592,
-              0.039967872,
-              -0.030748183,
-              -0.011446819,
-              -0.014453254,
-              0.02498229,
-              -0.097532175,
-              -0.035378877,
-              -0.03757795,
-              -0.010181498,
-              -0.06392041,
-              0.025538994,
-              0.02061816,
-              0.03757256,
-              -0.1043548,
-              -0.028326731,
-              -0.05209465,
-              0.0128473425,
-              -0.051238894,
-              -0.029034877,
-              -0.09633617,
-              -0.042309195,
-              0.067165054,
-              -0.030870603,
-              -0.010357507,
-              0.027381465,
-              -0.028105576,
-              0.010302046,
-              0.04306986,
-              0.022315372,
-              0.007954779,
-              0.056068663,
-              0.04071972,
-              0.09293905,
-              0.016536433,
-              -0.053764775,
-              0.00047211433,
-              0.050708972,
-              0.042510226,
-              -0.029195962,
-              0.009274875,
-              -0.010647389,
-              -0.037209682,
-              0.002267011,
-              -0.030304702,
-              0.0745741,
-              0.0026207205,
-              -0.017582772,
-              0.0028797672,
-              0.038404796,
-              0.00723137,
-              0.045613218,
-              0.03998252,
-              0.014209623,
-              -0.0142997475,
-              0.05850862,
-              0.03630791,
-              0.055294298,
-              -0.020075988,
-              -0.08041808,
-              -0.030250112,
-              -0.014920701,
-              0.022349516,
-              0.011911506,
-              -0.06903851,
-              -1.8806734e-08,
-              -0.078480355,
-              0.046674173,
-              -0.023920896,
-              0.0634942,
-              0.02396477,
-              0.0014517035,
-              -0.090798445,
-              -0.06684978,
-              -0.0801405,
-              0.005503192,
-              0.053675175,
-              0.104841895,
-              -0.066848256,
-              0.015522683,
-              0.067097165,
-              0.070832625,
-              -0.03197915,
-              0.020843629,
-              -0.0219202,
-              -0.0073016756,
-              -0.010645817,
-              0.0040983153,
-              0.03313765,
-              -0.0790081,
-              0.03878132,
-              -0.075230986,
-              -0.015732396,
-              0.0060099233,
-              0.0051297406,
-              -0.061492138,
-              0.04202211,
-              0.09544608,
-              -0.04318599,
-              0.014424486,
-              -0.10617826,
-              -0.027963417,
-              0.011034413,
-              0.069576606,
-              0.06689785,
-              -0.07479674,
-              -0.07851099,
-              0.042766396,
-              -0.034639932,
-              -0.10607304,
-              -0.03577663,
-              0.051540814,
-              0.068673156,
-              -0.049959548,
-              0.015460458,
-              -0.064520314,
-              -0.076010585,
-              0.026035817,
-              0.07440218,
-              -0.012396022,
-              0.13329679,
-              0.074770845,
-              0.05134284,
-              0.020977058,
-              -0.026776016,
-              0.08894323,
-              0.039937407,
-              -0.04102053,
-              0.03194075,
-              0.018113315
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 4,
-          "total_tokens": 4
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json
+++ b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json
@ -0,0 +1,96 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_structured_output[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:structured_output]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
+        },
+        {
+          "role": "user",
+          "content": "Please give me information about Michael Jordan."
+        }
+      ],
+      "max_tokens": 4096,
+      "response_format": {
+        "type": "json_schema",
+        "json_schema": {
+          "name": "AnswerFormat",
+          "schema": {
+            "properties": {
+              "first_name": {
+                "title": "First Name",
+                "type": "string"
+              },
+              "last_name": {
+                "title": "Last Name",
+                "type": "string"
+              },
+              "year_of_birth": {
+                "title": "Year Of Birth",
+                "type": "integer"
+              }
+            },
+            "required": [
+              "first_name",
+              "last_name",
+              "year_of_birth"
+            ],
+            "title": "AnswerFormat",
+            "type": "object"
+          }
+        }
+      },
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-df353403c7fb",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 28,
+          "prompt_tokens": 66,
+          "total_tokens": 94,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/e3727f6c749ab8bdee2f581300092002485023b937d72b7aa8d4c15c9204fc5c.json
+++ b/tests/integration/inference/recordings/e3727f6c749ab8bdee2f581300092002485023b937d72b7aa8d4c15c9204fc5c.json
@ -1,54 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:8000/v1/v1/completions",
-    "headers": {},
-    "body": {
-      "model": "Qwen/Qwen3-0.6B",
-      "prompt": "I am feeling really sad today.",
-      "stream": false,
-      "extra_body": {
-        "guided_choices": [
-          "joy",
-          "sadness"
-        ]
-      }
-    },
-    "endpoint": "/v1/completions",
-    "model": "Qwen/Qwen3-0.6B"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.completion.Completion",
-      "__data__": {
-        "id": "rec-e3727f6c749a",
-        "choices": [
-          {
-            "finish_reason": "length",
-            "index": 0,
-            "logprobs": null,
-            "text": " I feel that I am not good enough, and I feel like I have no",
-            "stop_reason": null,
-            "prompt_logprobs": null
-          }
-        ],
-        "created": 0,
-        "model": "Qwen/Qwen3-0.6B",
-        "object": "text_completion",
-        "system_fingerprint": null,
-        "usage": {
-          "completion_tokens": 16,
-          "prompt_tokens": 7,
-          "total_tokens": 23,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": null
-        },
-        "service_tier": null,
-        "kv_transfer_params": null
-      }
-    },
-    "is_streaming": false
-  },
-  "id_normalization_mapping": {}
-}
--- a/tests/integration/inference/recordings/e780faa88ebc38e9a74c5d5fa942921d291f73653719e73301196c0258732bf7.json
+++ b/tests/integration/inference/recordings/e780faa88ebc38e9a74c5d5fa942921d291f73653719e73301196c0258732bf7.json
@ -1,421 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://0.0.0.0:11434/v1/v1/embeddings",
-    "headers": {},
-    "body": {
-      "model": "all-minilm:l6-v2",
-      "input": "This is completely different content",
-      "encoding_format": "float"
-    },
-    "endpoint": "/v1/embeddings",
-    "model": "all-minilm:l6-v2"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
-      "__data__": {
-        "data": [
-          {
-            "embedding": [
-              0.050927628,
-              0.038399037,
-              -0.05559374,
-              -0.105984606,
-              0.06944504,
-              -0.08054001,
-              -0.025946686,
-              -0.045175657,
-              0.068730615,
-              0.016510814,
-              -0.0011700827,
-              0.023414683,
-              -0.0034143464,
-              0.06804153,
-              -0.021997927,
-              -0.014162646,
-              0.12356902,
-              -0.06536738,
-              -0.082627006,
-              0.04300477,
-              -0.039514318,
-              0.055434275,
-              -0.008866895,
-              0.020934915,
-              0.016280092,
-              0.09630312,
-              -0.022835929,
-              0.09175565,
-              0.06409549,
-              -0.06226981,
-              0.010888244,
-              0.07833004,
-              0.08844764,
-              -0.008459277,
-              -0.07542651,
-              0.04800223,
-              0.0042286967,
-              0.037884884,
-              0.0023502677,
-              0.032233667,
-              0.0047689923,
-              -0.070404515,
-              -0.06513966,
-              0.061046362,
-              0.021522248,
-              0.10113185,
-              -0.07537441,
-              -0.04074795,
-              -0.0055522234,
-              -0.0037093374,
-              -0.021283673,
-              -0.018193243,
-              -0.03323253,
-              -0.015658593,
-              0.0032862085,
-              0.037399907,
-              -0.021028537,
-              0.052572608,
-              0.10211333,
-              -0.018634265,
-              0.03612266,
-              0.08958185,
-              0.050681055,
-              0.019839589,
-              0.10220134,
-              -0.059074707,
-              -0.045562137,
-              -0.024107283,
-              -0.059917513,
-              -0.09795064,
-              -0.002078402,
-              0.032211803,
-              0.04863422,
-              0.08062527,
-              0.022614514,
-              0.0005379622,
-              -0.0015465368,
-              0.010018953,
-              -0.089729026,
-              0.023838207,
-              -0.015227461,
-              -0.020540234,
-              0.08525423,
-              -0.08025672,
-              -0.002200058,
-              0.0649954,
-              -0.023069935,
-              -0.06201302,
-              -0.06545048,
-              -0.029986514,
-              0.0045501734,
-              0.09718718,
-              0.09153336,
-              -0.0059684636,
-              -0.048185453,
-              -0.011855243,
-              -0.03170323,
-              -0.010363732,
-              0.029717747,
-              0.103405535,
-              -0.029072085,
-              0.005597891,
-              -0.03075466,
-              -0.011073092,
-              -0.038647823,
-              -0.01590583,
-              0.0008562756,
-              0.03479237,
-              0.0039463183,
-              -0.020063022,
-              -0.048164852,
-              0.026510539,
-              -0.061183933,
-              -0.046969693,
-              0.02144617,
-              -0.048452575,
-              0.02205527,
-              0.015723849,
-              0.056344535,
-              0.055321235,
-              0.037136998,
-              -0.08872732,
-              0.011813868,
-              0.0064246035,
-              -0.020590257,
-              -0.059401207,
-              0.012338125,
-              -2.4301395e-33,
-              0.068363585,
-              -0.05303797,
-              0.011494271,
-              0.06953355,
-              0.013304427,
-              0.0020351785,
-              -0.020783585,
-              0.028951883,
-              0.034663863,
-              -0.03274387,
-              0.00095708756,
-              0.008672852,
-              0.007618213,
-              -0.024579093,
-              0.030253874,
-              -0.034167152,
-              -0.0315152,
-              0.1105276,
-              0.03499844,
-              0.045135163,
-              0.00044455956,
-              0.051429555,
-              0.015050582,
-              -0.009024664,
-              0.023132037,
-              0.05141033,
-              -0.00417506,
-              0.004720958,
-              -0.016197585,
-              -0.025692327,
-              -0.024077175,
-              -0.00953031,
-              0.05060433,
-              -0.058328744,
-              0.04903431,
-              0.07964924,
-              0.03599398,
-              -0.065374464,
-              -0.035382472,
-              -0.07028972,
-              -0.009750123,
-              -0.031909473,
-              -0.04101604,
-              -0.041144423,
-              -0.036323845,
-              0.06685511,
-              0.016679594,
-              -0.048498012,
-              -0.015474575,
-              -0.00048608257,
-              0.03267068,
-              -0.010890426,
-              0.016646467,
-              -0.057286758,
-              0.008073807,
-              0.008808943,
-              -0.061580453,
-              -0.010815387,
-              0.0717443,
-              0.08607838,
-              0.014073375,
-              0.014896061,
-              -0.098295614,
-              -0.046653833,
-              0.033601493,
-              0.0647405,
-              -0.007525925,
-              0.025440095,
-              0.04171436,
-              -0.033113986,
-              -0.014553822,
-              0.024878975,
-              0.045614205,
-              -0.042929318,
-              -0.040504646,
-              -0.06304663,
-              -0.022389242,
-              0.010583584,
-              -0.032525852,
-              -0.03146621,
-              0.0081922775,
-              0.021094568,
-              0.0095269885,
-              -0.08290188,
-              -0.021351986,
-              0.008777032,
-              0.060185786,
-              -0.062182017,
-              0.004518251,
-              0.05684528,
-              -0.013033095,
-              0.01867297,
-              -0.008998785,
-              -0.076766245,
-              0.051622886,
-              1.6926977e-33,
-              -0.12588808,
-              0.011676749,
-              -0.079886116,
-              0.02304184,
-              0.029238446,
-              0.08721121,
-              0.06906221,
-              0.032533444,
-              0.047794122,
-              0.13212898,
-              0.03129717,
-              -0.0125368,
-              0.0035920327,
-              -0.016413208,
-              -0.038557872,
-              0.016005918,
-              0.09166447,
-              0.047558285,
-              -0.054981478,
-              0.06797876,
-              0.017968502,
-              0.118666455,
-              -0.069318265,
-              0.043814093,
-              0.04150938,
-              -0.017812226,
-              0.051738504,
-              0.06795029,
-              0.080493495,
-              0.005386888,
-              0.08878265,
-              -0.036075104,
-              -0.07708273,
-              -0.09101018,
-              -0.09597232,
-              -0.0937606,
-              -0.06200779,
-              0.06722552,
-              -0.0006647803,
-              0.029067127,
-              0.08179574,
-              -0.06488274,
-              -0.050375167,
-              -0.002403243,
-              -0.026110265,
-              -0.007630271,
-              0.011972527,
-              -0.08573929,
-              0.04107404,
-              0.024723932,
-              -0.02222756,
-              -0.11560156,
-              0.006753066,
-              -0.04589066,
-              -0.06369223,
-              0.053635046,
-              0.005769477,
-              0.06325056,
-              0.0048679966,
-              -0.057087842,
-              0.041931894,
-              0.022344982,
-              -0.14709935,
-              0.026361033,
-              0.106274396,
-              -0.0059068515,
-              0.020035667,
-              0.034950804,
-              -0.03342695,
-              -0.03884034,
-              -0.076072656,
-              -0.11173452,
-              -0.038953967,
-              -0.10270519,
-              0.04714134,
-              -0.049391687,
-              0.074747935,
-              0.041724026,
-              -0.031083144,
-              0.0033830043,
-              0.055804495,
-              -0.031882074,
-              -0.02541756,
-              0.050101582,
-              0.035991114,
-              0.09143438,
-              -0.07581111,
-              -0.050589707,
-              0.0074097887,
-              -0.0014020415,
-              -0.05036443,
-              -0.0015289022,
-              0.005471816,
-              0.07689256,
-              0.014164922,
-              -1.8297508e-08,
-              0.029913928,
-              -0.057959806,
-              -0.06846765,
-              0.026196472,
-              -0.0035178436,
-              0.11374637,
-              0.056845777,
-              -0.09315407,
-              0.0027757618,
-              0.10895455,
-              -0.033027817,
-              0.005051668,
-              -0.043633904,
-              -0.048978273,
-              0.011912417,
-              0.059747256,
-              -0.08661686,
-              -0.052748058,
-              0.026321623,
-              0.042173225,
-              -0.0035451513,
-              0.03797019,
-              0.022595786,
-              -0.0614702,
-              0.01268269,
-              0.040893063,
-              -0.084825225,
-              0.041167296,
-              -0.038163006,
-              0.008364558,
-              0.01014753,
-              0.024994388,
-              -0.012504467,
-              -0.045078665,
-              0.0102669485,
-              -0.046302866,
-              0.061438397,
-              0.016235871,
-              -0.0011558776,
-              0.007455159,
-              -0.019448454,
-              -0.06798961,
-              0.05472832,
-              0.09646006,
-              -0.04711737,
-              0.060088705,
-              0.0030213061,
-              -0.08877283,
-              0.037262574,
-              -0.009947699,
-              0.0035697597,
-              -0.07833652,
-              0.02169359,
-              -0.013075168,
-              0.072521746,
-              -0.0649658,
-              -0.029920656,
-              -0.017777385,
-              0.033904497,
-              0.02919506,
-              0.08793891,
-              0.008437021,
-              0.064442866,
-              -0.01656208
-            ],
-            "index": 0,
-            "object": "embedding"
-          }
-        ],
-        "model": "all-minilm:l6-v2",
-        "object": "list",
-        "usage": {
-          "prompt_tokens": 5,
-          "total_tokens": 5
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json
+++ b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json
@ -0,0 +1,92 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_simple_tool_call[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in San Francisco?"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get weather for a location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "City name"
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-e89112e7735f",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking for the weather in San Francisco. I need to check if there's a function available for that. Looking at the tools provided, there's a function called get_weather that requires a location parameter. The description says it gets weather for a location, and the parameter is the city name. The user provided \"San Francisco\" as the location, so I should call the get_weather function with \"San Francisco\" as the argument. I don't see any other parameters needed here, so the tool call should be straightforward. Just make sure the city name is correctly formatted in JSON.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-feead29842dc40b2831c41ed397f555f",
+                  "function": {
+                    "arguments": "{\"location\": \"San Francisco\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 146,
+          "prompt_tokens": 161,
+          "total_tokens": 307,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/eb7da4d664ddae8fb27ddefb18dae5008c35c09ca3bf67d3bfa521e7a7a55275.json
+++ b/tests/integration/inference/recordings/eb7da4d664ddae8fb27ddefb18dae5008c35c09ca3bf67d3bfa521e7a7a55275.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.079436-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json
+++ b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json
@ -28,7 +28,7 @@
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
-            "text": "sadness",
+            "text": "joy",
            "stop_reason": null,
            "prompt_logprobs": null
          }
@ -38,13 +38,12 @@
        "object": "text_completion",
        "system_fingerprint": null,
        "usage": {
-          "completion_tokens": 3,
+          "completion_tokens": 2,
          "prompt_tokens": 7,
-          "total_tokens": 10,
+          "total_tokens": 9,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        },
-        "service_tier": null,
        "kv_transfer_params": null
      }
    },
--- a/tests/integration/inference/recordings/f3a922cab63a794cb49849cb95bf5ebb7afb2df8aaee4abef6b132c1b373de7d.json
+++ b/tests/integration/inference/recordings/f3a922cab63a794cb49849cb95bf5ebb7afb2df8aaee4abef6b132c1b373de7d.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:10.779723-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/f8f023b96c521ba3f3f3f06156012dc518c00dbcb1bff697ba2bb7e18b7476ac.json
+++ b/tests/integration/inference/recordings/f8f023b96c521ba3f3f3f06156012dc518c00dbcb1bff697ba2bb7e18b7476ac.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:35:05.489695-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-10-08T11:35:04.346635-07:00",
-            "size": 7919570944,
-            "size_vram": 7919570944,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "F16"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/f9f69a2cc8ad22a4cba6d5dc359f41d497fb277b08a7dd6085545ddbc7dbb1bd.json
+++ b/tests/integration/inference/recordings/f9f69a2cc8ad22a4cba6d5dc359f41d497fb277b08a7dd6085545ddbc7dbb1bd.json
@ -1,75 +0,0 @@
-{
-  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=ollama/all-minilm:l6-v2]",
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/api/ps",
-    "headers": {},
-    "body": {},
-    "endpoint": "/api/ps",
-    "model": ""
-  },
-  "response": {
-    "body": {
-      "__type__": "ollama._types.ProcessResponse",
-      "__data__": {
-        "models": [
-          {
-            "model": "llama3.2:3b",
-            "name": "llama3.2:3b",
-            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
-            "expires_at": "2025-10-08T16:14:05.423042-07:00",
-            "size": 3367856128,
-            "size_vram": 3367856128,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "3.2B",
-              "quantization_level": "Q4_K_M"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-10-08T11:32:11.101611-07:00",
-            "size": 585846784,
-            "size_vram": 585846784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          },
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-10-08T11:30:00.392919-07:00",
-            "size": 2350966784,
-            "size_vram": 2350966784,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          }
-        ]
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375180,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-aeeb49e5e51c42fa94562780165bd620",
+              "object": "model_permission",
+              "created": 1762375180,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375115,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-feec0a894be04f738e12b596ff163b64",
+              "object": "model_permission",
+              "created": 1762375115,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_stop_sequence[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:stop_sequence]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374330,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-119e17052e4c4c13bd791af3138d5360",
+              "object": "model_permission",
+              "created": 1762374330,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375226,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-c6ae673fda084519b3c67947896cd3b0",
+              "object": "model_permission",
+              "created": 1762375226,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374573,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-3f422354a81e491b87f93d5b192a0e1a",
+              "object": "model_permission",
+              "created": 1762374573,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374305,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-794e16e59ddb4216a8bedfdf485b8f24",
+              "object": "model_permission",
+              "created": 1762374305,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374317,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-ff7d26d076eb4373a0631a80fe3ae063",
+              "object": "model_permission",
+              "created": 1762374317,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375033,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-2a16fede981b43be9e1cbe3dbedd1e74",
+              "object": "model_permission",
+              "created": 1762375033,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json
+++ b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json
@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374297,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-4bc93704559a4e1d8492aeec7222040c",
+              "object": "model_permission",
+              "created": 1762374297,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
--- a/Show more
+++ b/Show more