diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
index ec4d7f977..d44cba4ee 100644
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@@ -72,7 +72,8 @@ runs:
           echo "New recordings detected, committing and pushing"
           git add tests/integration/
 
-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
           git fetch origin ${{ github.ref_name }}
           git rebase origin/${{ github.ref_name }}
           echo "Rebased successfully"
@@ -88,6 +89,8 @@ runs:
       run: |
         # Ollama logs (if ollama container exists)
         sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        # vllm logs (if vllm container exists)
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
         # Note: distro container logs are now dumped in integration-tests.sh before container is removed
 
     - name: Upload logs
diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml
index 17ebd42f2..34ced0998 100644
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@@ -11,13 +11,14 @@ runs:
           --name vllm \
           -p 8000:8000 \
           --privileged=true \
-          quay.io/higginsd/vllm-cpu:65393ee064 \
+          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
           --host 0.0.0.0 \
           --port 8000 \
           --enable-auto-tool-choice \
-          --tool-call-parser llama3_json \
-          --model /root/.cache/Llama-3.2-1B-Instruct \
-          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+          --tool-call-parser hermes \
+          --model /root/.cache/Qwen3-0.6B \
+          --served-model-name Qwen/Qwen3-0.6B \
+          --max-model-len 8192
 
           # Wait for vllm to be ready
           echo "Waiting for vllm to be ready..."
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 41822fa79..2c797e906 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -27,7 +27,6 @@ on:
   schedule:
     # If changing the cron schedule, update the provider in the test-matrix job
     - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 5d9917bfd..448b08908 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -9976,6 +9976,70 @@ components:
         - metadata
       title: VectorStoreObject
       description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
     "OpenAICreateVectorStoreRequestWithExtraBody":
       type: object
       properties:
@@ -10001,15 +10065,7 @@ components:
           description: >-
             (Optional) Expiration policy for the vector store
         chunking_strategy:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
           description: >-
             (Optional) Strategy for splitting files into chunks
         metadata:
@@ -10085,70 +10141,6 @@ components:
         - deleted
       title: VectorStoreDeleteResponse
       description: Response from deleting a vector store.
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
     "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
       type: object
       properties:
@@ -10606,7 +10598,9 @@ components:
           description: >-
             Object type identifier for the search results page
         search_query:
-          type: string
+          type: array
+          items:
+            type: string
           description: >-
             The original search query that was executed
         data:
diff --git a/docs/docs/distributions/self_hosted_distro/starter.md b/docs/docs/distributions/self_hosted_distro/starter.md
index f6786a95c..84c35f3d3 100644
--- a/docs/docs/distributions/self_hosted_distro/starter.md
+++ b/docs/docs/distributions/self_hosted_distro/starter.md
@@ -163,7 +163,41 @@ docker run \
   --port $LLAMA_STACK_PORT
 ```
 
-### Via venv
+The container will run the distribution with a SQLite store by default. This store is used for the following components:
+
+- Metadata store: store metadata about the models, providers, etc.
+- Inference store: collect of responses from the inference provider
+- Agents store: store agent configurations (sessions, turns, etc.)
+- Agents Responses store: store responses from the agents
+
+However, you can use PostgreSQL instead by running the `starter::run-with-postgres-store.yaml` configuration:
+
+```bash
+docker run \
+  -it \
+  --pull always \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -e OPENAI_API_KEY=your_openai_key \
+  -e FIREWORKS_API_KEY=your_fireworks_key \
+  -e TOGETHER_API_KEY=your_together_key \
+  -e POSTGRES_HOST=your_postgres_host \
+  -e POSTGRES_PORT=your_postgres_port \
+  -e POSTGRES_DB=your_postgres_db \
+  -e POSTGRES_USER=your_postgres_user \
+  -e POSTGRES_PASSWORD=your_postgres_password \
+  llamastack/distribution-starter \
+  starter::run-with-postgres-store.yaml
+```
+
+Postgres environment variables:
+
+- `POSTGRES_HOST`: Postgres host (default: `localhost`)
+- `POSTGRES_PORT`: Postgres port (default: `5432`)
+- `POSTGRES_DB`: Postgres database name (default: `llamastack`)
+- `POSTGRES_USER`: Postgres username (default: `llamastack`)
+- `POSTGRES_PASSWORD`: Postgres password (default: `llamastack`)
+
+### Via Conda or venv
 
 Ensure you have configured the starter distribution using the environment variables explained above.
 
@@ -171,8 +205,11 @@ Ensure you have configured the starter distribution using the environment variab
 # Install dependencies for the starter distribution
 uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
 
-# Run the server
+# Run the server (with SQLite - default)
 uv run --with llama-stack llama stack run starter
+
+# Or run with PostgreSQL
+uv run --with llama-stack llama stack run starter::run-with-postgres-store.yaml
 ```
 
 ## Example Usage
diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx
index 7a2931690..957cd04da 100644
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service
 |-------|------|----------|---------|-------------|
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | API Key for the passthrouth endpoint |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
 | `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |
 
 ## Sample Configuration
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index a705f499a..cc720ad18 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -9260,6 +9260,70 @@ components:
         - metadata
       title: VectorStoreObject
       description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
     "OpenAICreateVectorStoreRequestWithExtraBody":
       type: object
       properties:
@@ -9285,15 +9349,7 @@ components:
           description: >-
             (Optional) Expiration policy for the vector store
         chunking_strategy:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
           description: >-
             (Optional) Strategy for splitting files into chunks
         metadata:
@@ -9369,70 +9425,6 @@ components:
         - deleted
       title: VectorStoreDeleteResponse
       description: Response from deleting a vector store.
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
     "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
       type: object
       properties:
@@ -9890,7 +9882,9 @@ components:
           description: >-
             Object type identifier for the search results page
         search_query:
-          type: string
+          type: array
+          items:
+            type: string
           description: >-
             The original search query that was executed
         data:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 5d9917bfd..448b08908 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -9976,6 +9976,70 @@ components:
         - metadata
       title: VectorStoreObject
       description: OpenAI Vector Store object.
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
     "OpenAICreateVectorStoreRequestWithExtraBody":
       type: object
       properties:
@@ -10001,15 +10065,7 @@ components:
           description: >-
             (Optional) Expiration policy for the vector store
         chunking_strategy:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
           description: >-
             (Optional) Strategy for splitting files into chunks
         metadata:
@@ -10085,70 +10141,6 @@ components:
         - deleted
       title: VectorStoreDeleteResponse
       description: Response from deleting a vector store.
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
     "OpenAICreateVectorStoreFileBatchRequestWithExtraBody":
       type: object
       properties:
@@ -10606,7 +10598,9 @@ components:
           description: >-
             Object type identifier for the search results page
         search_query:
-          type: string
+          type: array
+          items:
+            type: string
           description: >-
             The original search query that was executed
         data:
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 372e97d8c..2d088f3df 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -405,11 +405,6 @@ fi
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
 
-# Additional exclusions for vllm setup
-if [[ "$TEST_SETUP" == "vllm" ]]; then
-    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
-fi
-
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
 if [[ -n "$TEST_PATTERN" ]]; then
     PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py
index f39212733..98f43f70d 100644
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@@ -260,7 +260,7 @@ class VectorStoreSearchResponsePage(BaseModel):
     """
 
     object: str = "vector_store.search_results.page"
-    search_query: str
+    search_query: list[str]
     data: list[VectorStoreSearchResponse]
     has_more: bool = False
     next_page: str | None = None
@@ -478,7 +478,7 @@ class OpenAICreateVectorStoreRequestWithExtraBody(BaseModel, extra="allow"):
     name: str | None = None
     file_ids: list[str] | None = None
     expires_after: dict[str, Any] | None = None
-    chunking_strategy: dict[str, Any] | None = None
+    chunking_strategy: VectorStoreChunkingStrategy | None = None
     metadata: dict[str, Any] | None = None
 
 
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index 78b38ba95..b54217619 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -20,6 +20,8 @@ from llama_stack.apis.vector_io import (
     SearchRankingOptions,
     VectorIO,
     VectorStoreChunkingStrategy,
+    VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
     VectorStoreDeleteResponse,
     VectorStoreFileBatchObject,
     VectorStoreFileContentsResponse,
@@ -167,6 +169,13 @@ class VectorIORouter(VectorIO):
         if embedding_dimension is not None:
             params.model_extra["embedding_dimension"] = embedding_dimension
 
+        # Set chunking strategy explicitly if not provided
+        if params.chunking_strategy is None or params.chunking_strategy.type == "auto":
+            # actualize the chunking strategy to static
+            params.chunking_strategy = VectorStoreChunkingStrategyStatic(
+                static=VectorStoreChunkingStrategyStaticConfig()
+            )
+
         return await provider.openai_create_vector_store(params)
 
     async def openai_list_vector_stores(
@@ -283,6 +292,8 @@ class VectorIORouter(VectorIO):
         chunking_strategy: VectorStoreChunkingStrategy | None = None,
     ) -> VectorStoreFileObject:
         logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
+        if chunking_strategy is None or chunking_strategy.type == "auto":
+            chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
         provider = await self.routing_table.get_provider_impl(vector_store_id)
         return await provider.openai_attach_file_to_vector_store(
             vector_store_id=vector_store_id,
diff --git a/src/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py
index fcf057db6..2a85837b6 100644
--- a/src/llama_stack/core/utils/config_resolution.py
+++ b/src/llama_stack/core/utils/config_resolution.py
@@ -52,7 +52,17 @@ def resolve_config_or_distro(
             logger.debug(f"Using distribution: {distro_config}")
             return distro_config
 
-    # Strategy 3: Try as built distribution name
+    # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
+    # eg: starter::run-with-postgres-store.yaml
+    # Use :: to avoid slash and confusion with a filesystem path
+    if "::" in config_or_distro:
+        distro_name, config_name = config_or_distro.split("::")
+        distro_config = _get_distro_config_path(distro_name, config_name)
+        if distro_config.exists():
+            logger.info(f"Using distribution: {distro_config}")
+            return distro_config
+
+    # Strategy 4: Try as built distribution name
     distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
     if distrib_config.exists():
         logger.debug(f"Using built distribution: {distrib_config}")
@@ -63,13 +73,15 @@ def resolve_config_or_distro(
         logger.debug(f"Using built distribution: {distrib_config}")
         return distrib_config
 
-    # Strategy 4: Failed - provide helpful error
+    # Strategy 5: Failed - provide helpful error
     raise ValueError(_format_resolution_error(config_or_distro, mode))
 
 
-def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
+def _get_distro_config_path(distro_name: str, mode: str) -> Path:
     """Get the config file path for a distro."""
-    return DISTRO_DIR / distro_name / f"{mode}.yaml"
+    if not mode.endswith(".yaml"):
+        mode = f"{mode}.yaml"
+    return DISTRO_DIR / distro_name / mode
 
 
 def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
diff --git a/src/llama_stack/core/utils/exec.py b/src/llama_stack/core/utils/exec.py
index 12fb82d01..98964db2c 100644
--- a/src/llama_stack/core/utils/exec.py
+++ b/src/llama_stack/core/utils/exec.py
@@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
             text=True,
             check=False,
         )
+
+        # Print stdout and stderr if command failed
+        if result.returncode != 0:
+            log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
+            if result.stdout:
+                log.error(f"STDOUT: {result.stdout}")
+            if result.stderr:
+                log.error(f"STDERR: {result.stderr}")
+
         return result.returncode
     except subprocess.SubprocessError as e:
         log.error(f"Subprocess error: {e}")
diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml
index c01e415a9..f29ac7712 100644
--- a/src/llama_stack/distributions/ci-tests/build.yaml
+++ b/src/llama_stack/distributions/ci-tests/build.yaml
@@ -56,4 +56,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py
index ab102f5f3..c06b1b98d 100644
--- a/src/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/src/llama_stack/distributions/ci-tests/ci_tests.py
@@ -13,5 +13,6 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
 def get_distribution_template() -> DistributionTemplate:
     template = get_starter_distribution_template(name="ci-tests")
     template.description = "CI tests for Llama Stack"
+    template.run_configs.pop("run-with-postgres-store.yaml", None)
 
     return template
diff --git a/src/llama_stack/distributions/postgres-demo/__init__.py b/src/llama_stack/distributions/postgres-demo/__init__.py
deleted file mode 100644
index 81473cb73..000000000
--- a/src/llama_stack/distributions/postgres-demo/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .postgres_demo import get_distribution_template  # noqa: F401
diff --git a/src/llama_stack/distributions/postgres-demo/build.yaml b/src/llama_stack/distributions/postgres-demo/build.yaml
deleted file mode 100644
index 063dc3999..000000000
--- a/src/llama_stack/distributions/postgres-demo/build.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-version: 2
-distribution_spec:
-  description: Quick start template for running Llama Stack with several popular providers
-  providers:
-    inference:
-    - provider_type: remote::vllm
-    - provider_type: inline::sentence-transformers
-    vector_io:
-    - provider_type: remote::chromadb
-    safety:
-    - provider_type: inline::llama-guard
-    agents:
-    - provider_type: inline::meta-reference
-    tool_runtime:
-    - provider_type: remote::brave-search
-    - provider_type: remote::tavily-search
-    - provider_type: inline::rag-runtime
-    - provider_type: remote::model-context-protocol
-image_type: venv
-additional_pip_packages:
-- asyncpg
-- psycopg2-binary
-- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/postgres-demo/postgres_demo.py b/src/llama_stack/distributions/postgres-demo/postgres_demo.py
deleted file mode 100644
index 876370ef3..000000000
--- a/src/llama_stack/distributions/postgres-demo/postgres_demo.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import (
-    BuildProvider,
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.distributions.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
-from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers = [
-        Provider(
-            provider_id="vllm-inference",
-            provider_type="remote::vllm",
-            config=VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL:=http://localhost:8000/v1}",
-            ),
-        ),
-    ]
-    providers = {
-        "inference": [
-            BuildProvider(provider_type="remote::vllm"),
-            BuildProvider(provider_type="inline::sentence-transformers"),
-        ],
-        "vector_io": [BuildProvider(provider_type="remote::chromadb")],
-        "safety": [BuildProvider(provider_type="inline::llama-guard")],
-        "agents": [BuildProvider(provider_type="inline::meta-reference")],
-        "tool_runtime": [
-            BuildProvider(provider_type="remote::brave-search"),
-            BuildProvider(provider_type="remote::tavily-search"),
-            BuildProvider(provider_type="inline::rag-runtime"),
-            BuildProvider(provider_type="remote::model-context-protocol"),
-        ],
-    }
-    name = "postgres-demo"
-
-    vector_io_providers = [
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}",
-                url="${env.CHROMADB_URL:=}",
-            ),
-        ),
-    ]
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    default_models = [
-        ModelInput(
-            model_id="${env.INFERENCE_MODEL}",
-            provider_id="vllm-inference",
-        )
-    ]
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    embedding_model = ModelInput(
-        model_id="nomic-embed-text-v1.5",
-        provider_id=embedding_provider.provider_id,
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 768,
-        },
-    )
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Quick start template for running Llama Stack with several popular providers",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider={},
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": inference_providers + [embedding_provider],
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                storage_backends={
-                    "kv_default": PostgresKVStoreConfig.sample_run_config(
-                        table_name="llamastack_kvstore",
-                    ),
-                    "sql_default": PostgresSqlStoreConfig.sample_run_config(),
-                },
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-        },
-    )
diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml
index b2e2a0c85..10cbb1389 100644
--- a/src/llama_stack/distributions/starter-gpu/build.yaml
+++ b/src/llama_stack/distributions/starter-gpu/build.yaml
@@ -57,4 +57,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
new file mode 100644
index 000000000..6dbbc8716
--- /dev/null
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -0,0 +1,281 @@
+version: 2
+image_name: starter-gpu
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      responses_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+  post_training:
+  - provider_id: huggingface-gpu
+    provider_type: inline::huggingface-gpu
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+      dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_postgres
+storage:
+  backends:
+    kv_postgres:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_postgres:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_postgres
+    inference:
+      table_name: inference_store
+      backend: sql_postgres
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_postgres
+    prompts:
+      namespace: prompts
+      backend: kv_postgres
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups: []
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml
index baa80ef3e..acd51f773 100644
--- a/src/llama_stack/distributions/starter/build.yaml
+++ b/src/llama_stack/distributions/starter/build.yaml
@@ -57,4 +57,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
new file mode 100644
index 000000000..530084bd9
--- /dev/null
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -0,0 +1,278 @@
+version: 2
+image_name: starter
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+      responses_store:
+        type: sql_postgres
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_postgres
+storage:
+  backends:
+    kv_postgres:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_postgres:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_postgres
+    inference:
+      table_name: inference_store
+      backend: sql_postgres
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_postgres
+    prompts:
+      namespace: prompts
+      backend: kv_postgres
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups: []
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
index 49b7a2463..88cd3a4fe 100644
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@@ -17,6 +17,11 @@ from llama_stack.core.datatypes import (
     ToolGroupInput,
     VectorStoresConfig,
 )
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    SqlStoreReference,
+)
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.datatypes import RemoteProviderSpec
@@ -36,6 +41,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
 )
 from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
 from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
+from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
 
 
@@ -181,6 +187,62 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
             provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
         ),
     ]
+    postgres_config = PostgresSqlStoreConfig.sample_run_config()
+    default_overrides = {
+        "inference": remote_inference_providers + [embedding_provider],
+        "vector_io": [
+            Provider(
+                provider_id="faiss",
+                provider_type="inline::faiss",
+                config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="sqlite-vec",
+                provider_type="inline::sqlite-vec",
+                config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="${env.MILVUS_URL:+milvus}",
+                provider_type="inline::milvus",
+                config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="${env.CHROMADB_URL:+chromadb}",
+                provider_type="remote::chromadb",
+                config=ChromaVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}/",
+                    url="${env.CHROMADB_URL:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.PGVECTOR_DB:+pgvector}",
+                provider_type="remote::pgvector",
+                config=PGVectorVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    db="${env.PGVECTOR_DB:=}",
+                    user="${env.PGVECTOR_USER:=}",
+                    password="${env.PGVECTOR_PASSWORD:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.QDRANT_URL:+qdrant}",
+                provider_type="remote::qdrant",
+                config=QdrantVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    url="${env.QDRANT_URL:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
+                provider_type="remote::weaviate",
+                config=WeaviateVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
+                ),
+            ),
+        ],
+        "files": [files_provider],
+    }
 
     return DistributionTemplate(
         name=name,
@@ -189,64 +251,10 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
         container_image=None,
         template_path=None,
         providers=providers,
-        additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
+        additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
         run_configs={
             "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": remote_inference_providers + [embedding_provider],
-                    "vector_io": [
-                        Provider(
-                            provider_id="faiss",
-                            provider_type="inline::faiss",
-                            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="sqlite-vec",
-                            provider_type="inline::sqlite-vec",
-                            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.MILVUS_URL:+milvus}",
-                            provider_type="inline::milvus",
-                            config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.CHROMADB_URL:+chromadb}",
-                            provider_type="remote::chromadb",
-                            config=ChromaVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}/",
-                                url="${env.CHROMADB_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.PGVECTOR_DB:+pgvector}",
-                            provider_type="remote::pgvector",
-                            config=PGVectorVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                db="${env.PGVECTOR_DB:=}",
-                                user="${env.PGVECTOR_USER:=}",
-                                password="${env.PGVECTOR_PASSWORD:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.QDRANT_URL:+qdrant}",
-                            provider_type="remote::qdrant",
-                            config=QdrantVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                url="${env.QDRANT_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
-                            provider_type="remote::weaviate",
-                            config=WeaviateVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
-                            ),
-                        ),
-                    ],
-                    "files": [files_provider],
-                },
+                provider_overrides=default_overrides,
                 default_models=[],
                 default_tool_groups=default_tool_groups,
                 default_shields=default_shields,
@@ -261,6 +269,55 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                     default_shield_id="llama-guard",
                 ),
             ),
+            "run-with-postgres-store.yaml": RunConfigSettings(
+                provider_overrides={
+                    **default_overrides,
+                    "agents": [
+                        Provider(
+                            provider_id="meta-reference",
+                            provider_type="inline::meta-reference",
+                            config=dict(
+                                persistence_store=postgres_config,
+                                responses_store=postgres_config,
+                            ),
+                        )
+                    ],
+                    "batches": [
+                        Provider(
+                            provider_id="reference",
+                            provider_type="inline::reference",
+                            config=dict(
+                                kvstore=KVStoreReference(
+                                    backend="kv_postgres",
+                                    namespace="batches",
+                                ).model_dump(exclude_none=True),
+                            ),
+                        )
+                    ],
+                },
+                storage_backends={
+                    "kv_postgres": PostgresKVStoreConfig.sample_run_config(),
+                    "sql_postgres": postgres_config,
+                },
+                storage_stores={
+                    "metadata": KVStoreReference(
+                        backend="kv_postgres",
+                        namespace="registry",
+                    ).model_dump(exclude_none=True),
+                    "inference": InferenceStoreReference(
+                        backend="sql_postgres",
+                        table_name="inference_store",
+                    ).model_dump(exclude_none=True),
+                    "conversations": SqlStoreReference(
+                        backend="sql_postgres",
+                        table_name="openai_conversations",
+                    ).model_dump(exclude_none=True),
+                    "prompts": KVStoreReference(
+                        backend="kv_postgres",
+                        namespace="prompts",
+                    ).model_dump(exclude_none=True),
+                },
+            ),
         },
         run_config_env_vars={
             "LLAMA_STACK_PORT": (
diff --git a/src/llama_stack/providers/remote/inference/passthrough/__init__.py b/src/llama_stack/providers/remote/inference/passthrough/__init__.py
index 69dd4c461..1cc46bff1 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/__init__.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/__init__.py
@@ -10,8 +10,8 @@ from .config import PassthroughImplConfig
 
 
 class PassthroughProviderDataValidator(BaseModel):
-    url: str
-    api_key: str
+    passthrough_url: str
+    passthrough_api_key: str
 
 
 async def get_adapter_impl(config: PassthroughImplConfig, _deps):
diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
index f8e8b8ce5..eca28a86a 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/config.py
@@ -6,7 +6,7 @@
 
 from typing import Any
 
-from pydantic import Field, SecretStr
+from pydantic import Field
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.schema_utils import json_schema_type
@@ -19,11 +19,6 @@ class PassthroughImplConfig(RemoteInferenceProviderConfig):
         description="The URL for the passthrough endpoint",
     )
 
-    api_key: SecretStr | None = Field(
-        default=None,
-        description="API Key for the passthrouth endpoint",
-    )
-
     @classmethod
     def sample_run_config(
         cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 4d4d4f41d..3c56acfbd 100644
--- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -5,9 +5,8 @@
 # the root directory of this source tree.
 
 from collections.abc import AsyncIterator
-from typing import Any
 
-from llama_stack_client import AsyncLlamaStackClient
+from openai import AsyncOpenAI
 
 from llama_stack.apis.inference import (
     Inference,
@@ -20,103 +19,117 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingsResponse,
 )
 from llama_stack.apis.models import Model
-from llama_stack.core.library_client import convert_pydantic_to_json_value
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.core.request_headers import NeedsRequestProviderData
 
 from .config import PassthroughImplConfig
 
 
-class PassthroughInferenceAdapter(Inference):
+class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
     def __init__(self, config: PassthroughImplConfig) -> None:
-        ModelRegistryHelper.__init__(self)
         self.config = config
 
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
     async def unregister_model(self, model_id: str) -> None:
         pass
 
     async def register_model(self, model: Model) -> Model:
         return model
 
-    def _get_client(self) -> AsyncLlamaStackClient:
-        passthrough_url = None
-        passthrough_api_key = None
-        provider_data = None
+    async def list_models(self) -> list[Model]:
+        """List models by calling the downstream /v1/models endpoint."""
+        client = self._get_openai_client()
 
-        if self.config.url is not None:
-            passthrough_url = self.config.url
-        else:
-            provider_data = self.get_request_provider_data()
-            if provider_data is None or not provider_data.passthrough_url:
-                raise ValueError(
-                    'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
-                )
-            passthrough_url = provider_data.passthrough_url
+        response = await client.models.list()
 
-        if self.config.api_key is not None:
-            passthrough_api_key = self.config.api_key.get_secret_value()
-        else:
-            provider_data = self.get_request_provider_data()
-            if provider_data is None or not provider_data.passthrough_api_key:
-                raise ValueError(
-                    'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
-                )
-            passthrough_api_key = provider_data.passthrough_api_key
+        # Convert from OpenAI format to Llama Stack Model format
+        models = []
+        for model_data in response.data:
+            downstream_model_id = model_data.id
+            custom_metadata = getattr(model_data, "custom_metadata", {}) or {}
 
-        return AsyncLlamaStackClient(
-            base_url=passthrough_url,
-            api_key=passthrough_api_key,
-            provider_data=provider_data,
+            # Prefix identifier with provider ID for local registry
+            local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
+
+            model = Model(
+                identifier=local_identifier,
+                provider_id=self.__provider_id__,
+                provider_resource_id=downstream_model_id,
+                model_type=custom_metadata.get("model_type", "llm"),
+                metadata=custom_metadata,
+            )
+            models.append(model)
+
+        return models
+
+    async def should_refresh_models(self) -> bool:
+        """Passthrough should refresh models since they come from downstream dynamically."""
+        return self.config.refresh_models
+
+    def _get_openai_client(self) -> AsyncOpenAI:
+        """Get an AsyncOpenAI client configured for the downstream server."""
+        base_url = self._get_passthrough_url()
+        api_key = self._get_passthrough_api_key()
+
+        return AsyncOpenAI(
+            base_url=f"{base_url.rstrip('/')}/v1",
+            api_key=api_key,
         )
 
-    async def openai_embeddings(
-        self,
-        params: OpenAIEmbeddingsRequestWithExtraBody,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    def _get_passthrough_url(self) -> str:
+        """Get the passthrough URL from config or provider data."""
+        if self.config.url is not None:
+            return self.config.url
+
+        provider_data = self.get_request_provider_data()
+        if provider_data is None:
+            raise ValueError(
+                'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
+            )
+        return provider_data.passthrough_url
+
+    def _get_passthrough_api_key(self) -> str:
+        """Get the passthrough API key from config or provider data."""
+        if self.config.auth_credential is not None:
+            return self.config.auth_credential.get_secret_value()
+
+        provider_data = self.get_request_provider_data()
+        if provider_data is None:
+            raise ValueError(
+                'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
+            )
+        return provider_data.passthrough_api_key
 
     async def openai_completion(
         self,
         params: OpenAICompletionRequestWithExtraBody,
     ) -> OpenAICompletion:
-        client = self._get_client()
-        model_obj = await self.model_store.get_model(params.model)
-
-        params = params.model_copy()
-        params.model = model_obj.provider_resource_id
-
+        """Forward completion request to downstream using OpenAI client."""
+        client = self._get_openai_client()
         request_params = params.model_dump(exclude_none=True)
-
-        return await client.inference.openai_completion(**request_params)
+        response = await client.completions.create(**request_params)
+        return response  # type: ignore
 
     async def openai_chat_completion(
         self,
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        client = self._get_client()
-        model_obj = await self.model_store.get_model(params.model)
-
-        params = params.model_copy()
-        params.model = model_obj.provider_resource_id
-
+        """Forward chat completion request to downstream using OpenAI client."""
+        client = self._get_openai_client()
         request_params = params.model_dump(exclude_none=True)
+        response = await client.chat.completions.create(**request_params)
+        return response  # type: ignore
 
-        return await client.inference.openai_chat_completion(**request_params)
-
-    def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
-        json_params = {}
-        for key, value in request_params.items():
-            json_input = convert_pydantic_to_json_value(value)
-            if isinstance(json_input, dict):
-                json_input = {k: v for k, v in json_input.items() if v is not None}
-            elif isinstance(json_input, list):
-                json_input = [x for x in json_input if x is not None]
-                new_input = []
-                for x in json_input:
-                    if isinstance(x, dict):
-                        x = {k: v for k, v in x.items() if v is not None}
-                    new_input.append(x)
-                json_input = new_input
-
-            json_params[key] = json_input
-
-        return json_params
+    async def openai_embeddings(
+        self,
+        params: OpenAIEmbeddingsRequestWithExtraBody,
+    ) -> OpenAIEmbeddingsResponse:
+        """Forward embeddings request to downstream using OpenAI client."""
+        client = self._get_openai_client()
+        request_params = params.model_dump(exclude_none=True)
+        response = await client.embeddings.create(**request_params)
+        return response  # type: ignore
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 41d4cb2d7..d047d9d12 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -26,6 +26,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreChunkingStrategy,
     VectorStoreChunkingStrategyAuto,
     VectorStoreChunkingStrategyStatic,
+    VectorStoreChunkingStrategyStaticConfig,
     VectorStoreContent,
     VectorStoreDeleteResponse,
     VectorStoreFileBatchObject,
@@ -414,6 +415,10 @@ class OpenAIVectorStoreMixin(ABC):
             in_progress=0,
             total=0,
         )
+        if not params.chunking_strategy or params.chunking_strategy.type == "auto":
+            chunking_strategy = VectorStoreChunkingStrategyStatic(static=VectorStoreChunkingStrategyStaticConfig())
+        else:
+            chunking_strategy = params.chunking_strategy
         store_info: dict[str, Any] = {
             "id": vector_store_id,
             "object": "vector_store",
@@ -426,7 +431,7 @@ class OpenAIVectorStoreMixin(ABC):
             "expires_at": None,
             "last_active_at": created_at,
             "file_ids": [],
-            "chunking_strategy": params.chunking_strategy,
+            "chunking_strategy": chunking_strategy.model_dump(),
         }
 
         # Add provider information to metadata if provided
@@ -637,7 +642,7 @@ class OpenAIVectorStoreMixin(ABC):
                     break
 
             return VectorStoreSearchResponsePage(
-                search_query=search_query,
+                search_query=query if isinstance(query, list) else [query],
                 data=data,
                 has_more=False,  # For simplicity, we don't implement pagination here
                 next_page=None,
@@ -647,7 +652,7 @@ class OpenAIVectorStoreMixin(ABC):
             logger.error(f"Error searching vector store {vector_store_id}: {e}")
             # Return empty results on error
             return VectorStoreSearchResponsePage(
-                search_query=search_query,
+                search_query=query if isinstance(query, list) else [query],
                 data=[],
                 has_more=False,
                 next_page=None,
@@ -886,8 +891,8 @@ class OpenAIVectorStoreMixin(ABC):
 
         # Determine pagination info
         has_more = len(file_objects) > limit
-        first_id = file_objects[0].id if file_objects else None
-        last_id = file_objects[-1].id if file_objects else None
+        first_id = limited_files[0].id if file_objects else None
+        last_id = limited_files[-1].id if file_objects else None
 
         return VectorStoreListFilesResponse(
             data=limited_files,
diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json
index 314070eab..858176dff 100644
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@@ -2,7 +2,8 @@
   "default": [
     {"suite": "base", "setup": "ollama"},
     {"suite": "vision", "setup": "ollama-vision"},
-    {"suite": "responses", "setup": "gpt"}
+    {"suite": "responses", "setup": "gpt"},
+    {"suite": "base-vllm-subset", "setup": "vllm"}
   ],
   "schedules": {
     "1 0 * * 0": [
diff --git a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json
new file mode 100644
index 000000000..00e0862e8
--- /dev/null
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374291,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-f70298e4ea3e4b4eb7f2cc2deb7a2b01",
+              "object": "model_permission",
+              "created": 1762374291,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json
new file mode 100644
index 000000000..605baf12e
--- /dev/null
+++ b/tests/integration/inference/recordings/0248ff8a1be5ff5ba88046947059ffbde15a1c52adbeea456bb42abdfc931bd1.json
@@ -0,0 +1,3010 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools_and_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "max_tokens": 4096,
+      "stream": true,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "<think>",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "Okay",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " asking",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " about",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " San",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " Francisco",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " CA",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " need",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " use",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " get",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "_weather",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " function",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " here",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " The",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " function",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " requires",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " location",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " parameter",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " which",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " this",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " case",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "San",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " Francisco",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " CA",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "\".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " should",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " make",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " sure",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " include",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " both",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " city",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " and",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " state",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " as",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " specified",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " Let",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " me",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " check",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " if",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " there",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " any",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " other",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " information",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " needed",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " but",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " just",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " wants",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " weather",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " So",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " tool",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " call",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " should",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " be",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " straightforward",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "'ll",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " format",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " JSON",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " correctly",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " within",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " tool",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "_call",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": " tags",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "</think>",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "chatcmpl-tool-33d90102b2fe4386808056bc3fa9ad17",
+                    "function": {
+                      "arguments": null,
+                      "name": "get_weather"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"location\": \"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "San",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " Francisco",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": ",",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": " CA",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-0248ff8a1be5",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null,
+              "stop_reason": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json
new file mode 100644
index 000000000..bbb81ab62
--- /dev/null
+++ b/tests/integration/inference/recordings/452805c3c85951c86e4e5dfeef078a2e184866dafee83186cd84932daae1af42.json
@@ -0,0 +1,84 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_tool_without_schema[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Call the no args tool"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "no_args_tool",
+            "description": "Tool with no arguments",
+            "parameters": {
+              "type": "object",
+              "properties": {}
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-452805c3c859",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants me to call the no args tool. Let me check the available functions. There's only one tool provided, which is the no_args_tool with no arguments. Since the user didn't specify any parameters, I should just return the tool call as instructed. I need to make sure the JSON is correctly formatted and within the XML tags. Alright, that's all I need.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-7a67269afe214c85924c5171612bbdbd",
+                  "function": {
+                    "arguments": "{}",
+                    "name": "no_args_tool"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 101,
+          "prompt_tokens": 136,
+          "total_tokens": 237,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json
new file mode 100644
index 000000000..04c59b0b7
--- /dev/null
+++ b/tests/integration/inference/recordings/496035259763c1bddb1a3148c2586663d08a5bc31f697d1fc5d9bed1c71f5950.json
@@ -0,0 +1,92 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_chat_completion_with_tools[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in Tokyo?"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get weather information",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "City name"
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-496035259763",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking about the weather in Tokyo. I need to use the get_weather function for that. The function requires the location parameter, which in this case is Tokyo. I should make sure to specify \"Tokyo\" as the location. Let me check if there are any other parameters needed, but no, the function only needs the location. So the tool call should be straightforward. I'll format the JSON correctly inside the tool_call tags.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-959b557fa67e4134a2391f5d35e5d5ae",
+                  "function": {
+                    "arguments": "{\"location\": \"Tokyo\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 117,
+          "prompt_tokens": 158,
+          "total_tokens": 275,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json
new file mode 100644
index 000000000..b33363e45
--- /dev/null
+++ b/tests/integration/inference/recordings/524ead18daaddb6228284820adaa3fb312d2a525cc35e20c181190ddf40793e6.json
@@ -0,0 +1,92 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestOpenAICompatibility::test_openai_format_preserves_complex_schemas[openai_client-txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Process this data"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "process_data",
+            "description": "Process structured data",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "data": {
+                  "$ref": "#/$defs/DataObject"
+                }
+              },
+              "$defs": {
+                "DataObject": {
+                  "type": "object",
+                  "properties": {
+                    "values": {
+                      "type": "array",
+                      "items": {
+                        "type": "number"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-524ead18daad",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants me to process the data. Let me check the available tools. There's a function called process_data that takes an object with a 'data' parameter. The data is an array of numbers. But the user hasn't provided any specific data yet. They just said \"Process this data.\" Hmm, maybe they expect me to prompt them for the data first. Wait, maybe there's a misunderstanding. Did they include the data in the conversation history? Let me look back. The user's message is \"Process this data.\" No data provided. Oh, maybe they made a mistake and forgot to include it. I need to ask them to provide the data so I can proceed. Let me confirm if there's any data mentioned. No, the current input is just the instruction. So I should ask the user to supply the data array of numbers to process.\n</think>\n\nPlease provide the structured data you'd like me to process. For example, an array of numbers like `[1, 2, 3]`.",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 212,
+          "prompt_tokens": 180,
+          "total_tokens": 392,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json
new file mode 100644
index 000000000..a20b23fa2
--- /dev/null
+++ b/tests/integration/inference/recordings/65eba1be095a7037c4f197f4168b310ebc8afc00aba3946ba498abe2fdbe6a63.json
@@ -0,0 +1,2113 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestStreamingWithTools::test_streaming_tool_calls[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What time is it in UTC?"
+        }
+      ],
+      "max_tokens": 4096,
+      "stream": true,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_time",
+            "description": "Get current time",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "timezone": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": "assistant",
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "<think>",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "Okay",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " user",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " asking",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " for",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " current",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " time",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " in",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " UTC",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " Let",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " me",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " check",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " tools",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " available",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " There",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "'s",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " function",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " called",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " get",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "_time",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " takes",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " a",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " timezone",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " parameter",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " Since",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " UTC",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " is",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " standard",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " time",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " zone",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ",",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " need",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " specify",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " that",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " So",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " I",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "'ll",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " call",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " get",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "_time",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " with",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " timezone",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " set",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " to",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " \"",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "UTC",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "\".",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " That",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " should",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " retrieve",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " the",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " time",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": " correctly",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": ".\n",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "</think>",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "\n\n",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": "chatcmpl-tool-41faa6bedd074d51a6335cd2447deeab",
+                    "function": {
+                      "arguments": null,
+                      "name": "get_time"
+                    },
+                    "type": "function"
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "{\"timezone\": \"",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "UTC",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": null,
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": [
+                  {
+                    "index": 0,
+                    "id": null,
+                    "function": {
+                      "arguments": "\"}",
+                      "name": null
+                    },
+                    "type": null
+                  }
+                ]
+              },
+              "finish_reason": null,
+              "index": 0,
+              "logprobs": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      },
+      {
+        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+        "__data__": {
+          "id": "rec-65eba1be095a",
+          "choices": [
+            {
+              "delta": {
+                "content": "",
+                "function_call": null,
+                "refusal": null,
+                "role": null,
+                "tool_calls": null
+              },
+              "finish_reason": "tool_calls",
+              "index": 0,
+              "logprobs": null,
+              "stop_reason": null
+            }
+          ],
+          "created": 0,
+          "model": "Qwen/Qwen3-0.6B",
+          "object": "chat.completion.chunk",
+          "service_tier": null,
+          "system_fingerprint": null,
+          "usage": null
+        }
+      }
+    ],
+    "is_streaming": true
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json
new file mode 100644
index 000000000..539668be7
--- /dev/null
+++ b/tests/integration/inference/recordings/744052775cf90e30dac587e6b809d41a8cc37adc29c500eecee2727f428cbf5a.json
@@ -0,0 +1,98 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tools[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "max_tokens": 4096,
+      "stream": false,
+      "tool_choice": "auto",
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get the current weather",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "The city and state (both required), e.g. San Francisco, CA."
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-744052775cf9",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to use the get_weather function. The function requires the location parameter, which is provided as San Francisco, CA. I should make sure to format the arguments correctly as a JSON object. Let me check the required parameters again. The location is required, so I can't omit it. I'll structure the tool call with the name \"get_weather\" and the arguments including \"location\": \"San Francisco, CA\". That should get the current weather information for the user.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-b59dc311dd914d3dbd6d455b122bc39c",
+                  "function": {
+                    "arguments": "{\"location\": \"San Francisco, CA\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 138,
+          "prompt_tokens": 185,
+          "total_tokens": 323,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json
new file mode 100644
index 000000000..05b4e2609
--- /dev/null
+++ b/tests/integration/inference/recordings/77cf218283607bfac37623e1bb4e6f33cae670df7d6995d432bca34c5dfb0e43.json
@@ -0,0 +1,67 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_with_tool_choice_none[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:tool_calling]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "Pretend you are a weather assistant."
+        },
+        {
+          "role": "user",
+          "content": "What's the weather like in San Francisco, CA?"
+        }
+      ],
+      "max_tokens": 4096,
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-77cf21828360",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking about the weather in San Francisco, CA. I need to check the current weather conditions. But wait, I can't access real-time data. I should mention that I can't provide the current weather forecast and ask them to check a reliable source like the National Weather Service or a weather app. Also, maybe suggest they can provide more details if they need help with something else related to the weather.\n</think>\n\nI'm sorry, but I can't provide real-time weather information. However, you can check the current weather for San Francisco, CA using the National Weather Service (NWS) website, weather apps like Weather.com, or local meteorological services. Let me know if there's anything else I can assist with!",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 154,
+          "prompt_tokens": 33,
+          "total_tokens": 187,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json
new file mode 100644
index 000000000..2f6e4d3fc
--- /dev/null
+++ b/tests/integration/inference/recordings/853f6a700b98d71d390b7d366e27133a22772fbdf11863158349c1b0625bbc72.json
@@ -0,0 +1,128 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestEdgeCases::test_multiple_tools_with_different_schemas[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Use one of the available tools"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "simple",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "x": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "complex",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "data": {
+                  "$ref": "#/$defs/Complex"
+                }
+              },
+              "$defs": {
+                "Complex": {
+                  "type": "object",
+                  "properties": {
+                    "nested": {
+                      "type": "array",
+                      "items": {
+                        "type": "number"
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        {
+          "type": "function",
+          "function": {
+            "name": "with_output",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "input": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-853f6a700b98",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, let's see. The user wants me to use one of the available tools. The tools provided are simple, complex, and with_output. The simple function takes an argument 'x' of type string. The complex function requires a 'data' parameter that's an object with a nested array of numbers. The with_output function takes an input string.\n\nThe user's query is about using a tool, but there's no specific function name mentioned. Wait, maybe the user expects me to choose one based on the context. Since the tools are available, but the query is a general instruction, perhaps I should ask for clarification. However, the instructions say to use one of the tools if possible. Since the user hasn't specified a particular function, maybe I should check if there's any implicit function needed. But looking at the tools, none are directly related to the query. The user might need to specify which tool to use. Alternatively, maybe the answer is to call the simple function with an example input. But without more context, it's hard to tell. Wait, maybe the user expects me to choose the simplest one. Let's go with the simple function first. So the tool call would be to the simple function with x set to some value. But the user hasn't provided a specific value. Maybe I should state that the tool requires a value. But according to the instructions, if possible, use one of the tools. Since the user hasn't given a value, perhaps the answer is to call the simple function with an example. But the parameters for the simple function require 'x' which is a string. Maybe the user expects me to proceed without needing more info. So I'll proceed by calling the simple function with x as \"example\".\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-12e2ba0189cf484bb936cbb254a5c32a",
+                  "function": {
+                    "arguments": "{\"x\": \"example\"}",
+                    "name": "simple"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 378,
+          "prompt_tokens": 265,
+          "total_tokens": 643,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json
new file mode 100644
index 000000000..fdec100bb
--- /dev/null
+++ b/tests/integration/inference/recordings/cfb292c0f41dbc4a2c0fb39016760f6c23c248a7bbffea19ac6bcab7bf25292d.json
@@ -0,0 +1,114 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_tool_with_complex_schema[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Book a flight from SFO to JFK for John Doe"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "book_flight",
+            "description": "Book a flight",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "flight": {
+                  "$ref": "#/$defs/FlightInfo"
+                },
+                "passenger": {
+                  "$ref": "#/$defs/Passenger"
+                }
+              },
+              "required": [
+                "flight",
+                "passenger"
+              ],
+              "$defs": {
+                "FlightInfo": {
+                  "type": "object",
+                  "properties": {
+                    "from": {
+                      "type": "string"
+                    },
+                    "to": {
+                      "type": "string"
+                    },
+                    "date": {
+                      "type": "string",
+                      "format": "date"
+                    }
+                  }
+                },
+                "Passenger": {
+                  "type": "object",
+                  "properties": {
+                    "name": {
+                      "type": "string"
+                    },
+                    "age": {
+                      "type": "integer"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-cfb292c0f41d",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user wants to book a flight from SFO to JFK for John Doe. Let me check the tools available. The provided function is book_flight, which requires flight information and a passenger. The parameters needed are flight (as a FlightInfo object) and passenger (with name and age). The user mentioned SFO to JFK, so the flight details are from and to. The passenger's name is John Doe, but the age isn't provided. Wait, the function parameters require the passenger's name and age, but the user only mentioned the name. Maybe the age is missing? But the user didn't specify it, so perhaps I should note that the age is required. However, the function's required parameters are flight and passenger, so even if age is missing, the function can't be called without it. So I need to include both flight info and passenger details. The user's message only gives the name and destination, not the flight details or age. Therefore, I need to ask for the flight details and the passenger's age. But the user hasn't provided those. So I can't proceed with the function call. Wait, but maybe the user expects me to assume some default? No, the function requires all parameters. Since the user hasn't provided flight details or age, I can't call the function. So the correct response is to prompt the user for those details.\n</think>\n\nThe booking requires the flight details and passenger's age. Could you provide the flight number and John Doe's age?",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 310,
+          "prompt_tokens": 261,
+          "total_tokens": 571,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json
new file mode 100644
index 000000000..eb6eb8eb2
--- /dev/null
+++ b/tests/integration/inference/recordings/df353403c7fb59ed88c52269261b3dd9b75f681f8bb5431b4f07006d6c08aa7c.json
@@ -0,0 +1,96 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_structured_output[txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:structured_output]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "system",
+          "content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
+        },
+        {
+          "role": "user",
+          "content": "Please give me information about Michael Jordan."
+        }
+      ],
+      "max_tokens": 4096,
+      "response_format": {
+        "type": "json_schema",
+        "json_schema": {
+          "name": "AnswerFormat",
+          "schema": {
+            "properties": {
+              "first_name": {
+                "title": "First Name",
+                "type": "string"
+              },
+              "last_name": {
+                "title": "Last Name",
+                "type": "string"
+              },
+              "year_of_birth": {
+                "title": "Year Of Birth",
+                "type": "integer"
+              }
+            },
+            "required": [
+              "first_name",
+              "last_name",
+              "year_of_birth"
+            ],
+            "title": "AnswerFormat",
+            "type": "object"
+          }
+        }
+      },
+      "stream": false
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-df353403c7fb",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 28,
+          "prompt_tokens": 66,
+          "total_tokens": 94,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json
new file mode 100644
index 000000000..856684a55
--- /dev/null
+++ b/tests/integration/inference/recordings/e89112e7735fccc5ad9ebe6a96454953aed0ba2501cabfaa80b742c2bf371cbc.json
@@ -0,0 +1,92 @@
+{
+  "test_id": "tests/integration/inference/test_tools_with_schemas.py::TestChatCompletionWithTools::test_simple_tool_call[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What's the weather in San Francisco?"
+        }
+      ],
+      "max_tokens": 4096,
+      "tools": [
+        {
+          "type": "function",
+          "function": {
+            "name": "get_weather",
+            "description": "Get weather for a location",
+            "parameters": {
+              "type": "object",
+              "properties": {
+                "location": {
+                  "type": "string",
+                  "description": "City name"
+                }
+              },
+              "required": [
+                "location"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "rec-e89112e7735f",
+        "choices": [
+          {
+            "finish_reason": "tool_calls",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "<think>\nOkay, the user is asking for the weather in San Francisco. I need to check if there's a function available for that. Looking at the tools provided, there's a function called get_weather that requires a location parameter. The description says it gets weather for a location, and the parameter is the city name. The user provided \"San Francisco\" as the location, so I should call the get_weather function with \"San Francisco\" as the argument. I don't see any other parameters needed here, so the tool call should be straightforward. Just make sure the city name is correctly formatted in JSON.\n</think>\n\n",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": [
+                {
+                  "id": "chatcmpl-tool-feead29842dc40b2831c41ed397f555f",
+                  "function": {
+                    "arguments": "{\"location\": \"San Francisco\"}",
+                    "name": "get_weather"
+                  },
+                  "type": "function"
+                }
+              ],
+              "reasoning_content": null
+            },
+            "stop_reason": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 146,
+          "prompt_tokens": 161,
+          "total_tokens": 307,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "prompt_logprobs": null,
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json
new file mode 100644
index 000000000..28f7d8296
--- /dev/null
+++ b/tests/integration/inference/recordings/f02f1bfd75adaea87b91dedc59430b99015b5ed0e2bbf24418a31146ffcbca9b.json
@@ -0,0 +1,53 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "Qwen/Qwen3-0.6B",
+      "prompt": "I am feeling really sad today.",
+      "stream": false,
+      "extra_body": {
+        "guided_choice": [
+          "joy",
+          "sadness"
+        ]
+      }
+    },
+    "endpoint": "/v1/completions",
+    "model": "Qwen/Qwen3-0.6B"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "rec-f02f1bfd75ad",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "joy",
+            "stop_reason": null,
+            "prompt_logprobs": null
+          }
+        ],
+        "created": 0,
+        "model": "Qwen/Qwen3-0.6B",
+        "object": "text_completion",
+        "system_fingerprint": null,
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 7,
+          "total_tokens": 9,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        },
+        "kv_transfer_params": null
+      }
+    },
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json
new file mode 100644
index 000000000..7256ae75d
--- /dev/null
+++ b/tests/integration/inference/recordings/models-0037f2d2065a360cfcc36c35f138318cfc6508e743ff9423da4b7b1d7bfd4f3f-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375180,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-aeeb49e5e51c42fa94562780165bd620",
+              "object": "model_permission",
+              "created": 1762375180,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json
new file mode 100644
index 000000000..e0f5fa68f
--- /dev/null
+++ b/tests/integration/inference/recordings/models-009fb75503cf565d6c97f70deb8235432b0020b93d55e3b33ea093664c4bbc82-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375115,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-feec0a894be04f738e12b596ff163b64",
+              "object": "model_permission",
+              "created": 1762375115,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json
new file mode 100644
index 000000000..0eb0a26a6
--- /dev/null
+++ b/tests/integration/inference/recordings/models-01e6ee9852f532d9b0d82dde2e7c831d698e81dea1be69433050d42643f35edc-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_stop_sequence[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:stop_sequence]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374330,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-119e17052e4c4c13bd791af3138d5360",
+              "object": "model_permission",
+              "created": 1762374330,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json
new file mode 100644
index 000000000..dc7c97f4f
--- /dev/null
+++ b/tests/integration/inference/recordings/models-10370bf5307b2fc971b8e53bdcc4e9eb4d3d76fe8ecdb31231b59576a612e972-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375226,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-c6ae673fda084519b3c67947896cd3b0",
+              "object": "model_permission",
+              "created": 1762375226,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json
new file mode 100644
index 000000000..833003741
--- /dev/null
+++ b/tests/integration/inference/recordings/models-1312e0d8579e9b0e6dcb222272de34115277db71c6c560872fa13722197f881f-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374573,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-3f422354a81e491b87f93d5b192a0e1a",
+              "object": "model_permission",
+              "created": 1762374573,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json
new file mode 100644
index 000000000..df660a0f1
--- /dev/null
+++ b/tests/integration/inference/recordings/models-134e731d073e9e07eb9782bbe292167f8ad08157a15150ce92135854d04050fc-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374305,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-794e16e59ddb4216a8bedfdf485b8f24",
+              "object": "model_permission",
+              "created": 1762374305,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json
new file mode 100644
index 000000000..fed71ffa7
--- /dev/null
+++ b/tests/integration/inference/recordings/models-14c0905df1b177d2f85b30b0285b0ffdc88d1a7b290e2155fb7a01f3c1436ca0-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374317,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-ff7d26d076eb4373a0631a80fe3ae063",
+              "object": "model_permission",
+              "created": 1762374317,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json
new file mode 100644
index 000000000..9a532d386
--- /dev/null
+++ b/tests/integration/inference/recordings/models-1bc879637162ba23badeea66c4c25a638869a3e90d16ef3e84dea1a613e7192e-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375033,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-2a16fede981b43be9e1cbe3dbedd1e74",
+              "object": "model_permission",
+              "created": 1762375033,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json
new file mode 100644
index 000000000..ab3269b57
--- /dev/null
+++ b/tests/integration/inference/recordings/models-2b9bac5da1a03c0b572bc019cc0c50904d49e6193990ca245908f4535bcaab43-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374297,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-4bc93704559a4e1d8492aeec7222040c",
+              "object": "model_permission",
+              "created": 1762374297,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json b/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json
new file mode 100644
index 000000000..8237cc20c
--- /dev/null
+++ b/tests/integration/inference/recordings/models-394c30370fe5b724c5fe1292984373b281d47b2ac0d49e8b598f13cf100b3ad8-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374532,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-e353aa079d5145c19953791ac99daeba",
+              "object": "model_permission",
+              "created": 1762374532,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json b/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json
new file mode 100644
index 000000000..14b37fb0a
--- /dev/null
+++ b/tests/integration/inference/recordings/models-3f4208962fdb2be3e7057777fc93a149890bd1dfa8a92597e176f23658e86cd8-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375260,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-10c27d1c9e324b18b65321b422e19af9",
+              "object": "model_permission",
+              "created": 1762375260,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json b/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json
new file mode 100644
index 000000000..4af25e17a
--- /dev/null
+++ b/tests/integration/inference/recordings/models-4a729b00af209ad60846d1904e5973ad081aa5f595de50f5ef1aae304cb67ef3-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375040,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-f01c211577294936958dd28046c89dba",
+              "object": "model_permission",
+              "created": 1762375040,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json b/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json
new file mode 100644
index 000000000..54b48967b
--- /dev/null
+++ b/tests/integration/inference/recordings/models-4f62bcb9cdf74f4c2ed804038def162f18ad384182b0f174918607e9ed3c1515-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375266,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-7166a6fcd331435eb2d0f0a6b23382ed",
+              "object": "model_permission",
+              "created": 1762375266,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json b/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json
new file mode 100644
index 000000000..30a33793b
--- /dev/null
+++ b/tests/integration/inference/recordings/models-54b420cdb98a0149a618088f55746e26b7bf6e7c5ebf5fa07c13ec9e366521d3-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374301,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-cd16b092c5a04e719ddf786f0c3e935e",
+              "object": "model_permission",
+              "created": 1762374301,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json b/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json
new file mode 100644
index 000000000..4193dce51
--- /dev/null
+++ b/tests/integration/inference/recordings/models-613f5d11a8cda7126115f96650334fde0a0457a6b4a2605bc15eec9b50a6956c-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374295,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-9f71adbb206846bb9d0e12834e41551e",
+              "object": "model_permission",
+              "created": 1762374295,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json b/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json
new file mode 100644
index 000000000..d5916fff7
--- /dev/null
+++ b/tests/integration/inference/recordings/models-62a361f55d61a98ea0863e9acfb5ab5d540c5d19e791415ee476474f7f1ed90f-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374342,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-a8b7b38c40584a03b4b346b6c181fb93",
+              "object": "model_permission",
+              "created": 1762374342,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json b/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json
new file mode 100644
index 000000000..1542aa0cf
--- /dev/null
+++ b/tests/integration/inference/recordings/models-668fc72f70ac72d5c112fe79d86d5c790611456b3f0102832f27e6edd420ab54-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375235,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-dd48560646f141298f5cc2ef3467e54b",
+              "object": "model_permission",
+              "created": 1762375235,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json b/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json
new file mode 100644
index 000000000..fed0e7785
--- /dev/null
+++ b/tests/integration/inference/recordings/models-702eee4572e9b17ff0b0fdd55b10021f7077f0afcba922d6a53db0b537542518-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374500,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-0ba0c3a54dcb4e57bc0308fd54425933",
+              "object": "model_permission",
+              "created": 1762374500,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json b/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json
new file mode 100644
index 000000000..f53aadba5
--- /dev/null
+++ b/tests/integration/inference/recordings/models-723d37a5bceab199cff076a0dcc2d4ee7596b7c800f13c64f6a6ecdbf4ed2f3a-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374311,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-e95a9ed7439245b5995add97fb50f765",
+              "object": "model_permission",
+              "created": 1762374311,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json b/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json
new file mode 100644
index 000000000..5f5d13e94
--- /dev/null
+++ b/tests/integration/inference/recordings/models-768c497339830cf86ddd7843f33d0ed06b3bce3ef2ae9f854364b534ba8cafb7-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375099,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-6b5eba46536f43df902871dd257e1676",
+              "object": "model_permission",
+              "created": 1762375099,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json b/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json
new file mode 100644
index 000000000..69b91db75
--- /dev/null
+++ b/tests/integration/inference/recordings/models-7b0f2493d699e58cdfe0a9dab38f4423771c8ebced2020b1e15cbb35470c1ca2-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_base64[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375207,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-bbfbcf20cac146e0ae5e45ae6a42632d",
+              "object": "model_permission",
+              "created": 1762375207,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json b/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json
new file mode 100644
index 000000000..d6d7b81a0
--- /dev/null
+++ b/tests/integration/inference/recordings/models-7ed97509ff199eabe1380caa36b9e5934e9d04a9cafcfa2d21d20f6f85679ae4-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375273,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-4935d35e00fd4acdbe78662f42342e77",
+              "object": "model_permission",
+              "created": 1762375273,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json b/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json
new file mode 100644
index 000000000..1e7cb92bf
--- /dev/null
+++ b/tests/integration/inference/recordings/models-805e6b510b1ab33505a1af85c0d2a766cd3415512212d80f6292ca0ef5c359e1-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_base64_batch_processing[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374591,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-e19031997a1e44d99c8b5ae55725a887",
+              "object": "model_permission",
+              "created": 1762374591,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json b/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json
new file mode 100644
index 000000000..2e4cb4cb0
--- /dev/null
+++ b/tests/integration/inference/recordings/models-860b7e8309e0761e20e845be75c0a28d759384a367f6308f2a921702318a5dba-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375027,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-ec44b40a73b04912a837001376b59cff",
+              "object": "model_permission",
+              "created": 1762375027,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json b/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json
new file mode 100644
index 000000000..4ab79796c
--- /dev/null
+++ b/tests/integration/inference/recordings/models-8903569d538f9836ac6251d90c4668d3057e8e0ced847a08fd7a6faedb5710c3-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374356,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-3203232f1dbd426aba98ef1593dd3c01",
+              "object": "model_permission",
+              "created": 1762374356,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json b/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json
new file mode 100644
index 000000000..f2c124b73
--- /dev/null
+++ b/tests/integration/inference/recordings/models-8aa8c593dd64639678c294146fd56804393856c6e85197e6317ebd88351be21d-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375248,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-5efe67a621074e979edaaf8fcfee9a80",
+              "object": "model_permission",
+              "created": 1762375248,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json b/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json
new file mode 100644
index 000000000..0b5b6e4ed
--- /dev/null
+++ b/tests/integration/inference/recordings/models-8fc4c7b563b9bd423b74dcb4683039248f41d86c02703bd2dce845d972c9ae6f-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_user_parameter[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375135,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-5509cf924e5e4fc89091e4593f264258",
+              "object": "model_permission",
+              "created": 1762375135,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json b/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json
new file mode 100644
index 000000000..f8ded3ee4
--- /dev/null
+++ b/tests/integration/inference/recordings/models-99ae704b53e3e3150cac5cd579e446e6545a4ab6a63048ce00ee1fbe5fbf1b4e-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[openai_client-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:non_streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374301,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-72ed55b56df1471b9f71c48bacf8b768",
+              "object": "model_permission",
+              "created": 1762374301,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json b/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json
new file mode 100644
index 000000000..32ecdf0b6
--- /dev/null
+++ b/tests/integration/inference/recordings/models-9a36a281899f0800f085473f5f0185b09a02022607b965ac08b4db2e9e7eabc9-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:suffix]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374295,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-abbbfbb49abc4312b2b2011d4d2ba19b",
+              "object": "model_permission",
+              "created": 1762374295,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json b/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json
new file mode 100644
index 000000000..454b7223f
--- /dev/null
+++ b/tests/integration/inference/recordings/models-9beac41c66cbe8568bb72b5ba0f5608597ef8a14b42585c22b1e7c45526537c1-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_single_string[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375065,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-d943fbda14264715906334300853cec7",
+              "object": "model_permission",
+              "created": 1762375065,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json b/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json
new file mode 100644
index 000000000..1eded64dd
--- /dev/null
+++ b/tests/integration/inference/recordings/models-a495ae010d48bb3649c822e3299e819c164c2311db231c81296ff4c72e6f81cb-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming_with_file[txt=vllm/Qwen/Qwen3-0.6B]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374323,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-15a0a1106fff4fdd8ce7574373fe3cee",
+              "object": "model_permission",
+              "created": 1762374323,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json b/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json
new file mode 100644
index 000000000..9501f622a
--- /dev/null
+++ b/tests/integration/inference/recordings/models-a77b3cb7370fd9f46e6ea12d72e1d9a8e7515f745289e93e5eb4a21d0e7b71b7-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375082,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-ef1d3bc6fefc432380ef0eabdf216fd3",
+              "object": "model_permission",
+              "created": 1762375082,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json b/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json
new file mode 100644
index 000000000..6558ad3c4
--- /dev/null
+++ b/tests/integration/inference/recordings/models-a82e913e058618dcb30b269a54d4e6a9cb1e0017a42efe04480874fe957194d4-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_invalid_model_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375165,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-b29f7386725b4f13976cd76b6dc3a278",
+              "object": "model_permission",
+              "created": 1762375165,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json b/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json
new file mode 100644
index 000000000..dc3944bb1
--- /dev/null
+++ b/tests/integration/inference/recordings/models-bee98cb55c3b74854d0bb71b23b7e01bbb9f1580b413a26dc3afbf9da8b7d995-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_logprobs[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:log_probs]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374336,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-21db8cc1a31e41eaaa4e653435618645",
+              "object": "model_permission",
+              "created": 1762374336,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json b/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json
new file mode 100644
index 000000000..b81f6ed60
--- /dev/null
+++ b/tests/integration/inference/recordings/models-c3d9f0302c09cecba4c3797ec2d65e358910e6194e13d1001fd3567ab2eff6aa-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_different_inputs_different_outputs[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374547,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-09fed2c5660e42658ab23c6d17b7840c",
+              "object": "model_permission",
+              "created": 1762374547,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json b/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json
new file mode 100644
index 000000000..03f377561
--- /dev/null
+++ b/tests/integration/inference/recordings/models-c6e251660301fe3f503b4c31dcb551087ca9118e65b97bd894954847723a9be0-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=vllm/Qwen/Qwen3-0.6B-inference:completion:sanity]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374297,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-70d68a901d2445f6b7f470c600b34c78",
+              "object": "model_permission",
+              "created": 1762374297,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json b/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json
new file mode 100644
index 000000000..2d1759b41
--- /dev/null
+++ b/tests/integration/inference/recordings/models-cb1f7d5cd412fddb3395ef125bbcdac95c85585f23684e71abf142004b164bbc-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375047,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-a8446fd6718649399402526dc6fe1477",
+              "object": "model_permission",
+              "created": 1762375047,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json b/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json
new file mode 100644
index 000000000..bac4b8cb4
--- /dev/null
+++ b/tests/integration/inference/recordings/models-cbecbec285766025f2bebca94904e63578190f33b47eb6f32cb4635a1b43e3cf-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-True]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375254,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-f6eb51901e6443e492061deac904737c",
+              "object": "model_permission",
+              "created": 1762375254,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json b/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json
new file mode 100644
index 000000000..89aef16d5
--- /dev/null
+++ b/tests/integration/inference/recordings/models-d650458718dae3a10405ce1d241f0e1ceeeae8edf516cf10c611edcdf64035e3-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming_with_n[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_02]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375279,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-31e50ba39ad84a7daa1a24a3c77dc550",
+              "object": "model_permission",
+              "created": 1762375279,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json b/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json
new file mode 100644
index 000000000..a9b4bf369
--- /dev/null
+++ b/tests/integration/inference/recordings/models-d8acc76e3d1b54eac9754a9d3a72c571fe3078b227a257aa15afdba946b69665-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-inference:chat_completion:streaming_01]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375241,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-3ebcc379347541ea94de0f91838829e5",
+              "object": "model_permission",
+              "created": 1762375241,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json b/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json
new file mode 100644
index 000000000..4bd1dde93
--- /dev/null
+++ b/tests/integration/inference/recordings/models-d9ff5f5ffaa7a64101936007fbe61cf2ed54f67609b54b56d92cb949234e3799-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_multiple_strings[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374449,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-311f880045284a469a286b8039177d10",
+              "object": "model_permission",
+              "created": 1762374449,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json b/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json
new file mode 100644
index 000000000..fa5bddb15
--- /dev/null
+++ b/tests/integration/inference/recordings/models-da380037dc0fe8ae61b838baf268e616057e46f8424df0a9b52f94e48cef4a7f-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=vllm/Qwen/Qwen3-0.6B-False]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375053,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-2e52800baf7e4d3389892f33feb3f52b",
+              "object": "model_permission",
+              "created": 1762375053,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json b/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json
new file mode 100644
index 000000000..5cdfadeb1
--- /dev/null
+++ b/tests/integration/inference/recordings/models-e42ca9261e3cee9c877322a51791ab6f113478170f8a21cd0a971c53b330e999-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[llama_stack_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375150,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-cfec81fed838407597a92838017f3ef5",
+              "object": "model_permission",
+              "created": 1762375150,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json b/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json
new file mode 100644
index 000000000..82c9665de
--- /dev/null
+++ b/tests/integration/inference/recordings/models-e5255919e39635597ad57c723896f9d258abaad9908b22ccd03c126ce597a5db-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_encoding_format_float[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374466,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-8811d359d9724f8cac7fd6df608f69bd",
+              "object": "model_permission",
+              "created": 1762374466,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json b/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json
new file mode 100644
index 000000000..f83a6dea7
--- /dev/null
+++ b/tests/integration/inference/recordings/models-e6664ff0c07b13aa2af6a85925f3841eef3907bc4a55f8bc352a8c960e782ada-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_with_dimensions[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374482,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-d4b4923adfdf40b7bd7698aa798e68eb",
+              "object": "model_permission",
+              "created": 1762374482,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json b/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json
new file mode 100644
index 000000000..9084b4b59
--- /dev/null
+++ b/tests/integration/inference/recordings/models-edbd3344609a0fa1e97f75ede14a094a34db0dd6cb52975abae9f6e7832c6760-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375291,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-a48cfd65bcd847d7aea01d44e8add51e",
+              "object": "model_permission",
+              "created": 1762375291,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json b/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json
new file mode 100644
index 000000000..9791dd1f7
--- /dev/null
+++ b/tests/integration/inference/recordings/models-f6a9f5d7181cf078717443564e4de54e08845224d96b9c8150fb5cfda2068e82-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_embeddings.py::test_openai_embeddings_empty_list_error[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762374517,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-916d53706b624fefb83e5dcc699e7a69",
+              "object": "model_permission",
+              "created": 1762374517,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json b/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json
new file mode 100644
index 000000000..c561e0df0
--- /dev/null
+++ b/tests/integration/inference/recordings/models-f936269fe152d95db3fb80fb10482e3cc79cfd6a28ebdf1a7a8b220ba2de641b-fb68f5a6.json
@@ -0,0 +1,45 @@
+{
+  "test_id": "tests/integration/inference/test_openai_completion.py::test_inference_store[client_with_models-txt=vllm/Qwen/Qwen3-0.6B-False]",
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:8000/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "Qwen/Qwen3-0.6B",
+          "created": 1762375285,
+          "object": "model",
+          "owned_by": "vllm",
+          "root": "/root/.cache/Qwen3-0.6B",
+          "parent": null,
+          "max_model_len": 8192,
+          "permission": [
+            {
+              "id": "modelperm-e0640be42b814b3394545ebe92d844b3",
+              "object": "model_permission",
+              "created": 1762375285,
+              "allow_create_engine": false,
+              "allow_sampling": true,
+              "allow_logprobs": true,
+              "allow_search_indices": false,
+              "allow_view": true,
+              "allow_fine_tuning": false,
+              "organization": "*",
+              "group": null,
+              "is_blocking": false
+            }
+          ]
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index e1fb6a1c7..0cec66afe 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
             "VLLM_URL": "http://localhost:8000/v1",
         },
         defaults={
-            "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
+            "text_model": "vllm/Qwen/Qwen3-0.6B",
             "embedding_model": "sentence-transformers/nomic-embed-text-v1.5",
         },
     ),
@@ -169,6 +169,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
         roots=base_roots,
         default_setup="ollama",
     ),
+    "base-vllm-subset": Suite(
+        name="base-vllm-subset",
+        roots=["tests/integration/inference"],
+        default_setup="vllm",
+    ),
     "responses": Suite(
         name="responses",
         roots=["tests/integration/responses"],
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 9da5dd25b..97ce4abe8 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -350,7 +350,7 @@ def test_openai_vector_store_search_empty(
     assert search_response is not None
     assert hasattr(search_response, "data")
     assert len(search_response.data) == 0  # Empty store should return no results
-    assert search_response.search_query == "test query"
+    assert search_response.search_query == ["test query"]
     assert search_response.has_more is False
 
 
@@ -679,7 +679,7 @@ def test_openai_vector_store_attach_file(
     assert file_attach_response.id == file.id
     assert file_attach_response.vector_store_id == vector_store.id
     assert file_attach_response.status == "completed"
-    assert file_attach_response.chunking_strategy.type == "auto"
+    assert file_attach_response.chunking_strategy.type == "static"
     assert file_attach_response.created_at > 0
     assert not file_attach_response.last_error
 
@@ -815,8 +815,8 @@ def test_openai_vector_store_list_files(
     assert set(file_ids) == {file.id for file in files_list.data}
     assert files_list.data[0].object == "vector_store.file"
     assert files_list.data[0].vector_store_id == vector_store.id
-    assert files_list.data[0].status == "completed"
-    assert files_list.data[0].chunking_strategy.type == "auto"
+    assert files_list.data[0].status in ["completed", "in_progress"]
+    assert files_list.data[0].chunking_strategy.type == "static"
     assert files_list.data[0].created_at > 0
     assert files_list.first_id == files_list.data[0].id
     assert not files_list.data[0].last_error
@@ -825,7 +825,7 @@ def test_openai_vector_store_list_files(
     assert first_page.has_more
     assert len(first_page.data) == 2
     assert first_page.first_id == first_page.data[0].id
-    assert first_page.last_id != first_page.data[-1].id
+    assert first_page.last_id == first_page.data[-1].id
 
     next_page = compat_client.vector_stores.files.list(
         vector_store_id=vector_store.id, limit=2, after=first_page.data[-1].id