Merge remote-tracking branch 'upstream/main' into feat/gunicorn-production-server

2025-12-03 18:00:36 +00:00 · 2025-10-30 16:43:34 +02:00 · 2025-10-30 16:43:34 +02:00 · b060f73e6d
commit b060f73e6d
parent 17d9ce5bfe b4ea05ada9
70 changed files with 46290 additions and 1133 deletions
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -43,6 +43,9 @@ jobs:
          cache: 'npm'
          cache-dependency-path: 'src/llama_stack/ui/'
      - name: Set up uv
        uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
      - name: Install npm dependencies
        run: npm ci
        working-directory: src/llama_stack/ui
@ -52,7 +55,7 @@ jobs:
        uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
        continue-on-error: true
        env:
-          SKIP: no-commit-to-branch
+          SKIP: no-commit-to-branch,mypy
          RUFF_OUTPUT_FORMAT: github
      - name: Check pre-commit results
@ -109,3 +112,16 @@ jobs:
            echo "$unstaged_files"
            exit 1
          fi
      - name: Sync dev + type_checking dependencies
        run: uv sync --group dev --group type_checking
      - name: Run mypy (full type_checking)
        run: |
          set +e
          uv run --group dev --group type_checking mypy
          status=$?
          if [ $status -ne 0 ]; then
            echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
          fi
          exit $status
--- a/.gitignore
+++ b/.gitignore
@ -32,3 +32,6 @@ CLAUDE.md
 docs/.docusaurus/
 docs/node_modules/
 docs/static/imported-files/
 docs/docs/api-deprecated/
 docs/docs/api-experimental/
 docs/docs/api/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -57,17 +57,27 @@ repos:
    hooks:
    -   id: uv-lock
-   repo: local
+-   repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.18.2
    hooks:
    -   id: mypy
        name: mypy
        additional_dependencies:
-          - uv==0.7.8
+          - uv==0.6.2
-        entry: uv run --group dev --group type_checking mypy
+          - pytest
-        language: python
+          - rich
-        types: [python]
+          - types-requests
          - pydantic
          - httpx
        pass_filenames: false
-        require_serial: true
+
 -   repo: local
    hooks:
    -   id: mypy-full
        name: mypy (full type_checking)
        entry: uv run --group dev --group type_checking mypy
        language: system
        pass_filenames: false
        stages: [manual]
 # - repo: https://github.com/tcort/markdown-link-check
 #   rev: v3.11.2
@ -152,7 +162,6 @@ repos:
        files: ^src/llama_stack/ui/.*\.(ts|tsx)$
        pass_filenames: false
        require_serial: true
      - id: check-log-usage
        name: Ensure 'llama_stack.log' usage for logging
        entry: bash
@ -171,7 +180,23 @@ repos:
              exit 1
            fi
            exit 0
-
+      - id: fips-compliance
        name: Ensure llama-stack remains FIPS compliant
        entry: bash
        language: system
        types: [python]
        pass_filenames: true
        exclude: '^tests/.*$'  # Exclude test dir as some safety tests used MD5
        args:
          - -c
          - |
            grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
              echo;
              echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
              echo "   These functions are not FIPS-compliant"
              echo;
              exit 1;
            } || true
 ci:
    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
    autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
 The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
 To run the expanded mypy configuration that CI enforces, use:
 ```bash
 uv run pre-commit run mypy-full --hook-stage manual --all-files
 ```
 or invoke mypy directly with all optional dependencies:
 ```bash
 uv run --group dev --group type_checking mypy
 ```
 ```{caution}
 Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
 ```
--- a/client-sdks/stainless/openapi.stainless.yml
+++ b/client-sdks/stainless/openapi.stainless.yml
@ -1,610 +0,0 @@
 # yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
 organization:
  # Name of your organization or company, used to determine the name of the client
  # and headings.
  name: llama-stack-client
  docs: https://llama-stack.readthedocs.io/en/latest/
  contact: llamastack@meta.com
 security:
  - {}
  - BearerAuth: []
 security_schemes:
  BearerAuth:
    type: http
    scheme: bearer
 # `targets` define the output targets and their customization options, such as
 # whether to emit the Node SDK and what it's package name should be.
 targets:
  node:
    package_name: llama-stack-client
    production_repo: llamastack/llama-stack-client-typescript
    publish:
      npm: false
  python:
    package_name: llama_stack_client
    production_repo: llamastack/llama-stack-client-python
    options:
      use_uv: true
    publish:
      pypi: true
    project_name: llama_stack_client
  kotlin:
    reverse_domain: com.llama_stack_client.api
    production_repo: null
    publish:
      maven: false
  go:
    package_name: llama-stack-client
    production_repo: llamastack/llama-stack-client-go
    options:
      enable_v2: true
      back_compat_use_shared_package: false
 # `client_settings` define settings for the API client, such as extra constructor
 # arguments (used for authentication), retry behavior, idempotency, etc.
 client_settings:
  default_env_prefix: LLAMA_STACK_CLIENT
  opts:
    api_key:
      type: string
      read_env: LLAMA_STACK_CLIENT_API_KEY
      auth: { security_scheme: BearerAuth }
      nullable: true
 # `environments` are a map of the name of the environment (e.g. "sandbox",
 # "production") to the corresponding url to use.
 environments:
  production: http://any-hosted-llama-stack.com
 # `pagination` defines [pagination schemes] which provides a template to match
 # endpoints and generate next-page and auto-pagination helpers in the SDKs.
 pagination:
  - name: datasets_iterrows
    type: offset
    request:
      dataset_id:
        type: string
      start_index:
        type: integer
        x-stainless-pagination-property:
          purpose: offset_count_param
      limit:
        type: integer
    response:
      data:
        type: array
        items:
          type: object
      next_index:
        type: integer
        x-stainless-pagination-property:
          purpose: offset_count_start_field
  - name: openai_cursor_page
    type: cursor
    request:
      limit:
        type: integer
      after:
        type: string
        x-stainless-pagination-property:
          purpose: next_cursor_param
    response:
      data:
        type: array
        items: {}
      has_more:
        type: boolean
      last_id:
        type: string
        x-stainless-pagination-property:
          purpose: next_cursor_field
 # `resources` define the structure and organziation for your API, such as how
 # methods and models are grouped together and accessed. See the [configuration
 # guide] for more information.
 #
 # [configuration guide]:
 #   https://app.stainlessapi.com/docs/guides/configure#resources
 resources:
  $shared:
    models:
      agent_config: AgentConfig
      interleaved_content_item: InterleavedContentItem
      interleaved_content: InterleavedContent
      param_type: ParamType
      safety_violation: SafetyViolation
      sampling_params: SamplingParams
      scoring_result: ScoringResult
      message: Message
      user_message: UserMessage
      completion_message: CompletionMessage
      tool_response_message: ToolResponseMessage
      system_message: SystemMessage
      tool_call: ToolCall
      query_result: RAGQueryResult
      document: RAGDocument
      query_config: RAGQueryConfig
      response_format: ResponseFormat
  toolgroups:
    models:
      tool_group: ToolGroup
      list_tool_groups_response: ListToolGroupsResponse
    methods:
      register: post /v1/toolgroups
      get: get /v1/toolgroups/{toolgroup_id}
      list: get /v1/toolgroups
      unregister: delete /v1/toolgroups/{toolgroup_id}
  tools:
    methods:
      get: get /v1/tools/{tool_name}
      list:
        endpoint: get /v1/tools
        paginated: false
  tool_runtime:
    models:
      tool_def: ToolDef
      tool_invocation_result: ToolInvocationResult
    methods:
      list_tools:
        endpoint: get /v1/tool-runtime/list-tools
        paginated: false
      invoke_tool: post /v1/tool-runtime/invoke
    subresources:
      rag_tool:
        methods:
          insert: post /v1/tool-runtime/rag-tool/insert
          query: post /v1/tool-runtime/rag-tool/query
  responses:
    models:
      response_object_stream: OpenAIResponseObjectStream
      response_object: OpenAIResponseObject
    methods:
      create:
        type: http
        endpoint: post /v1/responses
        streaming:
          stream_event_model: responses.response_object_stream
          param_discriminator: stream
      retrieve: get /v1/responses/{response_id}
      list:
        type: http
        endpoint: get /v1/responses
      delete:
        type: http
        endpoint: delete /v1/responses/{response_id}
    subresources:
      input_items:
        methods:
          list:
            type: http
            endpoint: get /v1/responses/{response_id}/input_items
  conversations:
    models:
      conversation_object: Conversation
    methods:
      create:
        type: http
        endpoint: post /v1/conversations
      retrieve: get /v1/conversations/{conversation_id}
      update:
        type: http
        endpoint: post /v1/conversations/{conversation_id}
      delete:
        type: http
        endpoint: delete /v1/conversations/{conversation_id}
    subresources:
      items:
        methods:
          get:
            type: http
            endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
          list:
            type: http
            endpoint: get /v1/conversations/{conversation_id}/items
          create:
            type: http
            endpoint: post /v1/conversations/{conversation_id}/items
  inspect:
    models:
      healthInfo: HealthInfo
      providerInfo: ProviderInfo
      routeInfo: RouteInfo
      versionInfo: VersionInfo
    methods:
      health: get /v1/health
      version: get /v1/version
  embeddings:
    models:
      create_embeddings_response: OpenAIEmbeddingsResponse
    methods:
      create: post /v1/embeddings
  chat:
    models:
      chat_completion_chunk: OpenAIChatCompletionChunk
    subresources:
      completions:
        methods:
          create:
            type: http
            endpoint: post /v1/chat/completions
            streaming:
              stream_event_model: chat.chat_completion_chunk
              param_discriminator: stream
          list:
            type: http
            endpoint: get /v1/chat/completions
          retrieve:
            type: http
            endpoint: get /v1/chat/completions/{completion_id}
  completions:
    methods:
      create:
        type: http
        endpoint: post /v1/completions
        streaming:
          param_discriminator: stream
  vector_io:
    models:
      queryChunksResponse: QueryChunksResponse
    methods:
      insert: post /v1/vector-io/insert
      query: post /v1/vector-io/query
  vector_stores:
    models:
      vector_store: VectorStoreObject
      list_vector_stores_response: VectorStoreListResponse
      vector_store_delete_response: VectorStoreDeleteResponse
      vector_store_search_response: VectorStoreSearchResponsePage
    methods:
      create: post /v1/vector_stores
      list:
        endpoint: get /v1/vector_stores
      retrieve: get /v1/vector_stores/{vector_store_id}
      update: post /v1/vector_stores/{vector_store_id}
      delete: delete /v1/vector_stores/{vector_store_id}
      search: post /v1/vector_stores/{vector_store_id}/search
    subresources:
      files:
        models:
          vector_store_file: VectorStoreFileObject
        methods:
          list: get /v1/vector_stores/{vector_store_id}/files
          retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
          update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
          delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
          create: post /v1/vector_stores/{vector_store_id}/files
          content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
      file_batches:
        models:
          vector_store_file_batches: VectorStoreFileBatchObject
          list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
        methods:
          create: post /v1/vector_stores/{vector_store_id}/file_batches
          retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
          list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
          cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
  models:
    models:
      model: Model
      list_models_response: ListModelsResponse
    methods:
      retrieve: get /v1/models/{model_id}
      list:
        endpoint: get /v1/models
        paginated: false
      register: post /v1/models
      unregister: delete /v1/models/{model_id}
    subresources:
      openai:
        methods:
          list:
            endpoint: get /v1/models
            paginated: false
  providers:
    models:
      list_providers_response: ListProvidersResponse
    methods:
      list:
        endpoint: get /v1/providers
        paginated: false
      retrieve: get /v1/providers/{provider_id}
  routes:
    models:
      list_routes_response: ListRoutesResponse
    methods:
      list:
        endpoint: get /v1/inspect/routes
        paginated: false
  moderations:
    models:
      create_response: ModerationObject
    methods:
      create: post /v1/moderations
  safety:
    models:
      run_shield_response: RunShieldResponse
    methods:
      run_shield: post /v1/safety/run-shield
  shields:
    models:
      shield: Shield
      list_shields_response: ListShieldsResponse
    methods:
      retrieve: get /v1/shields/{identifier}
      list:
        endpoint: get /v1/shields
        paginated: false
      register: post /v1/shields
      delete: delete /v1/shields/{identifier}
  synthetic_data_generation:
    models:
      syntheticDataGenerationResponse: SyntheticDataGenerationResponse
    methods:
      generate: post /v1/synthetic-data-generation/generate
  telemetry:
    models:
      span_with_status: SpanWithStatus
      trace: Trace
      query_spans_response: QuerySpansResponse
      event: Event
      query_condition: QueryCondition
    methods:
      query_traces:
        endpoint: post /v1alpha/telemetry/traces
        skip_test_reason: 'unsupported query params in java / kotlin'
      get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
      query_spans:
        endpoint: post /v1alpha/telemetry/spans
        skip_test_reason: 'unsupported query params in java / kotlin'
      query_metrics:
        endpoint: post /v1alpha/telemetry/metrics/{metric_name}
        skip_test_reason: 'unsupported query params in java / kotlin'
      # log_event: post /v1alpha/telemetry/events
      save_spans_to_dataset: post /v1alpha/telemetry/spans/export
      get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
      get_trace: get /v1alpha/telemetry/traces/{trace_id}
  scoring:
    methods:
      score: post /v1/scoring/score
      score_batch: post /v1/scoring/score-batch
  scoring_functions:
    methods:
      retrieve: get /v1/scoring-functions/{scoring_fn_id}
      list:
        endpoint: get /v1/scoring-functions
        paginated: false
      register: post /v1/scoring-functions
    models:
      scoring_fn: ScoringFn
      scoring_fn_params: ScoringFnParams
      list_scoring_functions_response: ListScoringFunctionsResponse
  benchmarks:
    methods:
      retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
      list:
        endpoint: get /v1alpha/eval/benchmarks
        paginated: false
      register: post /v1alpha/eval/benchmarks
    models:
      benchmark: Benchmark
      list_benchmarks_response: ListBenchmarksResponse
  files:
    methods:
      create: post /v1/files
      list: get /v1/files
      retrieve: get /v1/files/{file_id}
      delete: delete /v1/files/{file_id}
      content: get /v1/files/{file_id}/content
    models:
      file: OpenAIFileObject
      list_files_response: ListOpenAIFileResponse
      delete_file_response: OpenAIFileDeleteResponse
  alpha:
    subresources:
      inference:
        methods:
          rerank: post /v1alpha/inference/rerank
      post_training:
        models:
          algorithm_config: AlgorithmConfig
          post_training_job: PostTrainingJob
          list_post_training_jobs_response: ListPostTrainingJobsResponse
        methods:
          preference_optimize: post /v1alpha/post-training/preference-optimize
          supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
        subresources:
          job:
            methods:
              artifacts: get /v1alpha/post-training/job/artifacts
              cancel: post /v1alpha/post-training/job/cancel
              status: get /v1alpha/post-training/job/status
              list:
                endpoint: get /v1alpha/post-training/jobs
                paginated: false
      eval:
        methods:
          evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
          run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
          evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
          run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
        subresources:
          jobs:
            methods:
              cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
              status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
              retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
        models:
          evaluate_response: EvaluateResponse
          benchmark_config: BenchmarkConfig
          job: Job
      agents:
        methods:
          create: post /v1alpha/agents
          list: get /v1alpha/agents
          retrieve: get /v1alpha/agents/{agent_id}
          delete: delete /v1alpha/agents/{agent_id}
        models:
          inference_step: InferenceStep
          tool_execution_step: ToolExecutionStep
          tool_response: ToolResponse
          shield_call_step: ShieldCallStep
          memory_retrieval_step: MemoryRetrievalStep
        subresources:
          session:
            models:
              session: Session
            methods:
              list: get /v1alpha/agents/{agent_id}/sessions
              create: post /v1alpha/agents/{agent_id}/session
              delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
              retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
          steps:
            methods:
              retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
          turn:
            models:
              turn: Turn
              turn_response_event: AgentTurnResponseEvent
              agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
            methods:
              create:
                type: http
                endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
                streaming:
                  stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
                  param_discriminator: stream
              retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
              resume:
                type: http
                endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
                streaming:
                  stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
                  param_discriminator: stream
  beta:
    subresources:
      datasets:
        models:
          list_datasets_response: ListDatasetsResponse
        methods:
          register: post /v1beta/datasets
          retrieve: get /v1beta/datasets/{dataset_id}
          list:
            endpoint: get /v1beta/datasets
            paginated: false
          unregister: delete /v1beta/datasets/{dataset_id}
          iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
          appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
 settings:
  license: MIT
  unwrap_response_fields: [ data ]
 openapi:
  transformations:
    - command: renameValue
      reason: pydantic reserved name
      args:
        filter:
          only:
            - '$.components.schemas.InferenceStep.properties.model_response'
        rename:
          python:
            property_name: 'inference_model_response'
    # - command: renameValue
    #   reason: pydantic reserved name
    #   args:
    #     filter:
    #       only:
    #         - '$.components.schemas.Model.properties.model_type'
    #     rename:
    #       python:
    #         property_name: 'type'
    - command: mergeObject
      reason: Better return_type using enum
      args:
        target:
          - '$.components.schemas'
        object:
          ReturnType:
            additionalProperties: false
            properties:
              type:
                enum:
                  - string
                  - number
                  - boolean
                  - array
                  - object
                  - json
                  - union
                  - chat_completion_input
                  - completion_input
                  - agent_turn_input
            required:
              - type
            type: object
    - command: replaceProperties
      reason: Replace return type properties with better model (see above)
      args:
        filter:
          only:
            - '$.components.schemas.ScoringFn.properties.return_type'
            - '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
        value:
          $ref: '#/components/schemas/ReturnType'
    - command: oneOfToAnyOf
      reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
    - reason: For better names
      command: extractToRefs
      args:
        ref:
          target: '$.components.schemas.ToolCallDelta.properties.tool_call'
          name: '#/components/schemas/ToolCallOrString'
 # `readme` is used to configure the code snippets that will be rendered in the
 # README.md of various SDKs. In particular, you can change the `headline`
 # snippet's endpoint and the arguments to call it with.
 readme:
  example_requests:
    default:
      type: request
      endpoint: post /v1/chat/completions
      params: &ref_0 {}
    headline:
      type: request
      endpoint: post /v1/models
      params: *ref_0
    pagination:
      type: request
      endpoint: post /v1/chat/completions
      params: {}
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -15,6 +15,141 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
  /v1/batches:
    get:
      responses:
        '200':
          description: A list of batch objects.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBatchesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: List all batches for the current user.
      description: List all batches for the current user.
      parameters:
        - name: after
          in: query
          description: >-
            A cursor for pagination; returns batches after this batch ID.
          required: false
          schema:
            type: string
        - name: limit
          in: query
          description: >-
            Number of batches to return (default 20, max 100).
          required: true
          schema:
            type: integer
      deprecated: false
    post:
      responses:
        '200':
          description: The created batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Create a new batch for processing multiple API requests.
      description: >-
        Create a new batch for processing multiple API requests.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateBatchRequest'
        required: true
      deprecated: false
  /v1/batches/{batch_id}:
    get:
      responses:
        '200':
          description: The batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Retrieve information about a specific batch.
      description: >-
        Retrieve information about a specific batch.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to retrieve.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/batches/{batch_id}/cancel:
    post:
      responses:
        '200':
          description: The updated batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: Cancel a batch that is in progress.
      description: Cancel a batch that is in progress.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to cancel.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/chat/completions:
    get:
      responses:
@ -4212,6 +4347,331 @@ components:
      title: Error
      description: >-
        Error response from the API. Roughly follows RFC 7807.
    ListBatchesResponse:
      type: object
      properties:
        object:
          type: string
          const: list
          default: list
        data:
          type: array
          items:
            type: object
            properties:
              id:
                type: string
              completion_window:
                type: string
              created_at:
                type: integer
              endpoint:
                type: string
              input_file_id:
                type: string
              object:
                type: string
                const: batch
              status:
                type: string
                enum:
                  - validating
                  - failed
                  - in_progress
                  - finalizing
                  - completed
                  - expired
                  - cancelling
                  - cancelled
              cancelled_at:
                type: integer
              cancelling_at:
                type: integer
              completed_at:
                type: integer
              error_file_id:
                type: string
              errors:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      type: object
                      properties:
                        code:
                          type: string
                        line:
                          type: integer
                        message:
                          type: string
                        param:
                          type: string
                      additionalProperties: false
                      title: BatchError
                  object:
                    type: string
                additionalProperties: false
                title: Errors
              expired_at:
                type: integer
              expires_at:
                type: integer
              failed_at:
                type: integer
              finalizing_at:
                type: integer
              in_progress_at:
                type: integer
              metadata:
                type: object
                additionalProperties:
                  type: string
              model:
                type: string
              output_file_id:
                type: string
              request_counts:
                type: object
                properties:
                  completed:
                    type: integer
                  failed:
                    type: integer
                  total:
                    type: integer
                additionalProperties: false
                required:
                  - completed
                  - failed
                  - total
                title: BatchRequestCounts
              usage:
                type: object
                properties:
                  input_tokens:
                    type: integer
                  input_tokens_details:
                    type: object
                    properties:
                      cached_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - cached_tokens
                    title: InputTokensDetails
                  output_tokens:
                    type: integer
                  output_tokens_details:
                    type: object
                    properties:
                      reasoning_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - reasoning_tokens
                    title: OutputTokensDetails
                  total_tokens:
                    type: integer
                additionalProperties: false
                required:
                  - input_tokens
                  - input_tokens_details
                  - output_tokens
                  - output_tokens_details
                  - total_tokens
                title: BatchUsage
            additionalProperties: false
            required:
              - id
              - completion_window
              - created_at
              - endpoint
              - input_file_id
              - object
              - status
            title: Batch
        first_id:
          type: string
        last_id:
          type: string
        has_more:
          type: boolean
          default: false
      additionalProperties: false
      required:
        - object
        - data
        - has_more
      title: ListBatchesResponse
      description: >-
        Response containing a list of batch objects.
    CreateBatchRequest:
      type: object
      properties:
        input_file_id:
          type: string
          description: >-
            The ID of an uploaded file containing requests for the batch.
        endpoint:
          type: string
          description: >-
            The endpoint to be used for all requests in the batch.
        completion_window:
          type: string
          const: 24h
          description: >-
            The time window within which the batch should be processed.
        metadata:
          type: object
          additionalProperties:
            type: string
          description: Optional metadata for the batch.
        idempotency_key:
          type: string
          description: >-
            Optional idempotency key. When provided, enables idempotent behavior.
      additionalProperties: false
      required:
        - input_file_id
        - endpoint
        - completion_window
      title: CreateBatchRequest
    Batch:
      type: object
      properties:
        id:
          type: string
        completion_window:
          type: string
        created_at:
          type: integer
        endpoint:
          type: string
        input_file_id:
          type: string
        object:
          type: string
          const: batch
        status:
          type: string
          enum:
            - validating
            - failed
            - in_progress
            - finalizing
            - completed
            - expired
            - cancelling
            - cancelled
        cancelled_at:
          type: integer
        cancelling_at:
          type: integer
        completed_at:
          type: integer
        error_file_id:
          type: string
        errors:
          type: object
          properties:
            data:
              type: array
              items:
                type: object
                properties:
                  code:
                    type: string
                  line:
                    type: integer
                  message:
                    type: string
                  param:
                    type: string
                additionalProperties: false
                title: BatchError
            object:
              type: string
          additionalProperties: false
          title: Errors
        expired_at:
          type: integer
        expires_at:
          type: integer
        failed_at:
          type: integer
        finalizing_at:
          type: integer
        in_progress_at:
          type: integer
        metadata:
          type: object
          additionalProperties:
            type: string
        model:
          type: string
        output_file_id:
          type: string
        request_counts:
          type: object
          properties:
            completed:
              type: integer
            failed:
              type: integer
            total:
              type: integer
          additionalProperties: false
          required:
            - completed
            - failed
            - total
          title: BatchRequestCounts
        usage:
          type: object
          properties:
            input_tokens:
              type: integer
            input_tokens_details:
              type: object
              properties:
                cached_tokens:
                  type: integer
              additionalProperties: false
              required:
                - cached_tokens
              title: InputTokensDetails
            output_tokens:
              type: integer
            output_tokens_details:
              type: object
              properties:
                reasoning_tokens:
                  type: integer
              additionalProperties: false
              required:
                - reasoning_tokens
              title: OutputTokensDetails
            total_tokens:
              type: integer
          additionalProperties: false
          required:
            - input_tokens
            - input_tokens_details
            - output_tokens
            - output_tokens_details
            - total_tokens
          title: BatchUsage
      additionalProperties: false
      required:
        - id
        - completion_window
        - created_at
        - endpoint
        - input_file_id
        - object
        - status
      title: Batch
    Order:
      type: string
      enum:
@ -10258,6 +10718,10 @@ components:
          description: >-
            The content of the chunk, which can be interleaved text, images, or other
            types.
        chunk_id:
          type: string
          description: >-
            Unique identifier for the chunk. Must be provided explicitly.
        metadata:
          type: object
          additionalProperties:
@ -10278,10 +10742,6 @@ components:
          description: >-
            Optional embedding for the chunk. If not provided, it will be computed
            later.
        stored_chunk_id:
          type: string
          description: >-
            The chunk ID that is stored in the vector database. Used for backend functionality.
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: >-
@ -10290,6 +10750,7 @@ components:
      additionalProperties: false
      required:
        - content
        - chunk_id
        - metadata
      title: Chunk
      description: >-
@ -13527,6 +13988,19 @@ tags:
    description: >-
      APIs for creating and interacting with agentic systems.
    x-displayName: Agents
  - name: Batches
    description: >-
      The API is designed to allow use of openai client libraries for seamless integration.
      This API provides the following extensions:
       - idempotent batch creation
      Note: This API is currently under active development and may undergo changes.
    x-displayName: >-
      The Batches API enables efficient processing of multiple requests in a single
      operation, particularly useful for processing large datasets, batch evaluation
      workflows, and cost-effective inference at scale.
  - name: Benchmarks
    description: ''
  - name: Conversations
@ -13601,6 +14075,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
      - Batches
      - Benchmarks
      - Conversations
      - DatasetIO
--- a/docs/notebooks/llamastack_agents_getting_started_examples.ipynb
+++ b/docs/notebooks/llamastack_agents_getting_started_examples.ipynb
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
            'providers/eval/remote_nvidia'
          ],
        },
        {
          type: 'category',
          label: 'Telemetry',
          collapsed: true,
          items: [
            'providers/telemetry/index',
            'providers/telemetry/inline_meta-reference'
          ],
        },
        {
          type: 'category',
          label: 'Batches',
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -1414,6 +1414,193 @@
                "deprecated": true
            }
        },
        "/v1/openai/v1/batches": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A list of batch objects.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ListBatchesResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "List all batches for the current user.",
                "description": "List all batches for the current user.",
                "parameters": [
                    {
                        "name": "after",
                        "in": "query",
                        "description": "A cursor for pagination; returns batches after this batch ID.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "limit",
                        "in": "query",
                        "description": "Number of batches to return (default 20, max 100).",
                        "required": true,
                        "schema": {
                            "type": "integer"
                        }
                    }
                ],
                "deprecated": true
            },
            "post": {
                "responses": {
                    "200": {
                        "description": "The created batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Create a new batch for processing multiple API requests.",
                "description": "Create a new batch for processing multiple API requests.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/CreateBatchRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": true
            }
        },
        "/v1/openai/v1/batches/{batch_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "The batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Retrieve information about a specific batch.",
                "description": "Retrieve information about a specific batch.",
                "parameters": [
                    {
                        "name": "batch_id",
                        "in": "path",
                        "description": "The ID of the batch to retrieve.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": true
            }
        },
        "/v1/openai/v1/batches/{batch_id}/cancel": {
            "post": {
                "responses": {
                    "200": {
                        "description": "The updated batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Cancel a batch that is in progress.",
                "description": "Cancel a batch that is in progress.",
                "parameters": [
                    {
                        "name": "batch_id",
                        "in": "path",
                        "description": "The ID of the batch to cancel.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": true
            }
        },
        "/v1/openai/v1/chat/completions": {
            "get": {
                "responses": {
@ -6401,6 +6588,451 @@
                "title": "Job",
                "description": "A job execution instance with status tracking."
            },
            "ListBatchesResponse": {
                "type": "object",
                "properties": {
                    "object": {
                        "type": "string",
                        "const": "list",
                        "default": "list"
                    },
                    "data": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "id": {
                                    "type": "string"
                                },
                                "completion_window": {
                                    "type": "string"
                                },
                                "created_at": {
                                    "type": "integer"
                                },
                                "endpoint": {
                                    "type": "string"
                                },
                                "input_file_id": {
                                    "type": "string"
                                },
                                "object": {
                                    "type": "string",
                                    "const": "batch"
                                },
                                "status": {
                                    "type": "string",
                                    "enum": [
                                        "validating",
                                        "failed",
                                        "in_progress",
                                        "finalizing",
                                        "completed",
                                        "expired",
                                        "cancelling",
                                        "cancelled"
                                    ]
                                },
                                "cancelled_at": {
                                    "type": "integer"
                                },
                                "cancelling_at": {
                                    "type": "integer"
                                },
                                "completed_at": {
                                    "type": "integer"
                                },
                                "error_file_id": {
                                    "type": "string"
                                },
                                "errors": {
                                    "type": "object",
                                    "properties": {
                                        "data": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "code": {
                                                        "type": "string"
                                                    },
                                                    "line": {
                                                        "type": "integer"
                                                    },
                                                    "message": {
                                                        "type": "string"
                                                    },
                                                    "param": {
                                                        "type": "string"
                                                    }
                                                },
                                                "additionalProperties": false,
                                                "title": "BatchError"
                                            }
                                        },
                                        "object": {
                                            "type": "string"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "title": "Errors"
                                },
                                "expired_at": {
                                    "type": "integer"
                                },
                                "expires_at": {
                                    "type": "integer"
                                },
                                "failed_at": {
                                    "type": "integer"
                                },
                                "finalizing_at": {
                                    "type": "integer"
                                },
                                "in_progress_at": {
                                    "type": "integer"
                                },
                                "metadata": {
                                    "type": "object",
                                    "additionalProperties": {
                                        "type": "string"
                                    }
                                },
                                "model": {
                                    "type": "string"
                                },
                                "output_file_id": {
                                    "type": "string"
                                },
                                "request_counts": {
                                    "type": "object",
                                    "properties": {
                                        "completed": {
                                            "type": "integer"
                                        },
                                        "failed": {
                                            "type": "integer"
                                        },
                                        "total": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "completed",
                                        "failed",
                                        "total"
                                    ],
                                    "title": "BatchRequestCounts"
                                },
                                "usage": {
                                    "type": "object",
                                    "properties": {
                                        "input_tokens": {
                                            "type": "integer"
                                        },
                                        "input_tokens_details": {
                                            "type": "object",
                                            "properties": {
                                                "cached_tokens": {
                                                    "type": "integer"
                                                }
                                            },
                                            "additionalProperties": false,
                                            "required": [
                                                "cached_tokens"
                                            ],
                                            "title": "InputTokensDetails"
                                        },
                                        "output_tokens": {
                                            "type": "integer"
                                        },
                                        "output_tokens_details": {
                                            "type": "object",
                                            "properties": {
                                                "reasoning_tokens": {
                                                    "type": "integer"
                                                }
                                            },
                                            "additionalProperties": false,
                                            "required": [
                                                "reasoning_tokens"
                                            ],
                                            "title": "OutputTokensDetails"
                                        },
                                        "total_tokens": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "input_tokens",
                                        "input_tokens_details",
                                        "output_tokens",
                                        "output_tokens_details",
                                        "total_tokens"
                                    ],
                                    "title": "BatchUsage"
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "id",
                                "completion_window",
                                "created_at",
                                "endpoint",
                                "input_file_id",
                                "object",
                                "status"
                            ],
                            "title": "Batch"
                        }
                    },
                    "first_id": {
                        "type": "string"
                    },
                    "last_id": {
                        "type": "string"
                    },
                    "has_more": {
                        "type": "boolean",
                        "default": false
                    }
                },
                "additionalProperties": false,
                "required": [
                    "object",
                    "data",
                    "has_more"
                ],
                "title": "ListBatchesResponse",
                "description": "Response containing a list of batch objects."
            },
            "CreateBatchRequest": {
                "type": "object",
                "properties": {
                    "input_file_id": {
                        "type": "string",
                        "description": "The ID of an uploaded file containing requests for the batch."
                    },
                    "endpoint": {
                        "type": "string",
                        "description": "The endpoint to be used for all requests in the batch."
                    },
                    "completion_window": {
                        "type": "string",
                        "const": "24h",
                        "description": "The time window within which the batch should be processed."
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "string"
                        },
                        "description": "Optional metadata for the batch."
                    },
                    "idempotency_key": {
                        "type": "string",
                        "description": "Optional idempotency key. When provided, enables idempotent behavior."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "input_file_id",
                    "endpoint",
                    "completion_window"
                ],
                "title": "CreateBatchRequest"
            },
            "Batch": {
                "type": "object",
                "properties": {
                    "id": {
                        "type": "string"
                    },
                    "completion_window": {
                        "type": "string"
                    },
                    "created_at": {
                        "type": "integer"
                    },
                    "endpoint": {
                        "type": "string"
                    },
                    "input_file_id": {
                        "type": "string"
                    },
                    "object": {
                        "type": "string",
                        "const": "batch"
                    },
                    "status": {
                        "type": "string",
                        "enum": [
                            "validating",
                            "failed",
                            "in_progress",
                            "finalizing",
                            "completed",
                            "expired",
                            "cancelling",
                            "cancelled"
                        ]
                    },
                    "cancelled_at": {
                        "type": "integer"
                    },
                    "cancelling_at": {
                        "type": "integer"
                    },
                    "completed_at": {
                        "type": "integer"
                    },
                    "error_file_id": {
                        "type": "string"
                    },
                    "errors": {
                        "type": "object",
                        "properties": {
                            "data": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "properties": {
                                        "code": {
                                            "type": "string"
                                        },
                                        "line": {
                                            "type": "integer"
                                        },
                                        "message": {
                                            "type": "string"
                                        },
                                        "param": {
                                            "type": "string"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "title": "BatchError"
                                }
                            },
                            "object": {
                                "type": "string"
                            }
                        },
                        "additionalProperties": false,
                        "title": "Errors"
                    },
                    "expired_at": {
                        "type": "integer"
                    },
                    "expires_at": {
                        "type": "integer"
                    },
                    "failed_at": {
                        "type": "integer"
                    },
                    "finalizing_at": {
                        "type": "integer"
                    },
                    "in_progress_at": {
                        "type": "integer"
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "string"
                        }
                    },
                    "model": {
                        "type": "string"
                    },
                    "output_file_id": {
                        "type": "string"
                    },
                    "request_counts": {
                        "type": "object",
                        "properties": {
                            "completed": {
                                "type": "integer"
                            },
                            "failed": {
                                "type": "integer"
                            },
                            "total": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "completed",
                            "failed",
                            "total"
                        ],
                        "title": "BatchRequestCounts"
                    },
                    "usage": {
                        "type": "object",
                        "properties": {
                            "input_tokens": {
                                "type": "integer"
                            },
                            "input_tokens_details": {
                                "type": "object",
                                "properties": {
                                    "cached_tokens": {
                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "cached_tokens"
                                ],
                                "title": "InputTokensDetails"
                            },
                            "output_tokens": {
                                "type": "integer"
                            },
                            "output_tokens_details": {
                                "type": "object",
                                "properties": {
                                    "reasoning_tokens": {
                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "reasoning_tokens"
                                ],
                                "title": "OutputTokensDetails"
                            },
                            "total_tokens": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "input_tokens",
                            "input_tokens_details",
                            "output_tokens",
                            "output_tokens_details",
                            "total_tokens"
                        ],
                        "title": "BatchUsage"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "id",
                    "completion_window",
                    "created_at",
                    "endpoint",
                    "input_file_id",
                    "object",
                    "status"
                ],
                "title": "Batch"
            },
            "Order": {
                "type": "string",
                "enum": [
@ -13505,6 +14137,11 @@
            "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
            "x-displayName": "Agents"
        },
        {
            "name": "Batches",
            "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
            "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
        },
        {
            "name": "Benchmarks",
            "description": ""
@ -13555,6 +14192,7 @@
            "name": "Operations",
            "tags": [
                "Agents",
                "Batches",
                "Benchmarks",
                "DatasetIO",
                "Datasets",
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -1012,6 +1012,141 @@ paths:
          schema:
            type: string
      deprecated: true
  /v1/openai/v1/batches:
    get:
      responses:
        '200':
          description: A list of batch objects.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBatchesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: List all batches for the current user.
      description: List all batches for the current user.
      parameters:
        - name: after
          in: query
          description: >-
            A cursor for pagination; returns batches after this batch ID.
          required: false
          schema:
            type: string
        - name: limit
          in: query
          description: >-
            Number of batches to return (default 20, max 100).
          required: true
          schema:
            type: integer
      deprecated: true
    post:
      responses:
        '200':
          description: The created batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Create a new batch for processing multiple API requests.
      description: >-
        Create a new batch for processing multiple API requests.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateBatchRequest'
        required: true
      deprecated: true
  /v1/openai/v1/batches/{batch_id}:
    get:
      responses:
        '200':
          description: The batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Retrieve information about a specific batch.
      description: >-
        Retrieve information about a specific batch.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to retrieve.
          required: true
          schema:
            type: string
      deprecated: true
  /v1/openai/v1/batches/{batch_id}/cancel:
    post:
      responses:
        '200':
          description: The updated batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: Cancel a batch that is in progress.
      description: Cancel a batch that is in progress.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to cancel.
          required: true
          schema:
            type: string
      deprecated: true
  /v1/openai/v1/chat/completions:
    get:
      responses:
@ -4736,6 +4871,331 @@ components:
      title: Job
      description: >-
        A job execution instance with status tracking.
    ListBatchesResponse:
      type: object
      properties:
        object:
          type: string
          const: list
          default: list
        data:
          type: array
          items:
            type: object
            properties:
              id:
                type: string
              completion_window:
                type: string
              created_at:
                type: integer
              endpoint:
                type: string
              input_file_id:
                type: string
              object:
                type: string
                const: batch
              status:
                type: string
                enum:
                  - validating
                  - failed
                  - in_progress
                  - finalizing
                  - completed
                  - expired
                  - cancelling
                  - cancelled
              cancelled_at:
                type: integer
              cancelling_at:
                type: integer
              completed_at:
                type: integer
              error_file_id:
                type: string
              errors:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      type: object
                      properties:
                        code:
                          type: string
                        line:
                          type: integer
                        message:
                          type: string
                        param:
                          type: string
                      additionalProperties: false
                      title: BatchError
                  object:
                    type: string
                additionalProperties: false
                title: Errors
              expired_at:
                type: integer
              expires_at:
                type: integer
              failed_at:
                type: integer
              finalizing_at:
                type: integer
              in_progress_at:
                type: integer
              metadata:
                type: object
                additionalProperties:
                  type: string
              model:
                type: string
              output_file_id:
                type: string
              request_counts:
                type: object
                properties:
                  completed:
                    type: integer
                  failed:
                    type: integer
                  total:
                    type: integer
                additionalProperties: false
                required:
                  - completed
                  - failed
                  - total
                title: BatchRequestCounts
              usage:
                type: object
                properties:
                  input_tokens:
                    type: integer
                  input_tokens_details:
                    type: object
                    properties:
                      cached_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - cached_tokens
                    title: InputTokensDetails
                  output_tokens:
                    type: integer
                  output_tokens_details:
                    type: object
                    properties:
                      reasoning_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - reasoning_tokens
                    title: OutputTokensDetails
                  total_tokens:
                    type: integer
                additionalProperties: false
                required:
                  - input_tokens
                  - input_tokens_details
                  - output_tokens
                  - output_tokens_details
                  - total_tokens
                title: BatchUsage
            additionalProperties: false
            required:
              - id
              - completion_window
              - created_at
              - endpoint
              - input_file_id
              - object
              - status
            title: Batch
        first_id:
          type: string
        last_id:
          type: string
        has_more:
          type: boolean
          default: false
      additionalProperties: false
      required:
        - object
        - data
        - has_more
      title: ListBatchesResponse
      description: >-
        Response containing a list of batch objects.
    CreateBatchRequest:
      type: object
      properties:
        input_file_id:
          type: string
          description: >-
            The ID of an uploaded file containing requests for the batch.
        endpoint:
          type: string
          description: >-
            The endpoint to be used for all requests in the batch.
        completion_window:
          type: string
          const: 24h
          description: >-
            The time window within which the batch should be processed.
        metadata:
          type: object
          additionalProperties:
            type: string
          description: Optional metadata for the batch.
        idempotency_key:
          type: string
          description: >-
            Optional idempotency key. When provided, enables idempotent behavior.
      additionalProperties: false
      required:
        - input_file_id
        - endpoint
        - completion_window
      title: CreateBatchRequest
    Batch:
      type: object
      properties:
        id:
          type: string
        completion_window:
          type: string
        created_at:
          type: integer
        endpoint:
          type: string
        input_file_id:
          type: string
        object:
          type: string
          const: batch
        status:
          type: string
          enum:
            - validating
            - failed
            - in_progress
            - finalizing
            - completed
            - expired
            - cancelling
            - cancelled
        cancelled_at:
          type: integer
        cancelling_at:
          type: integer
        completed_at:
          type: integer
        error_file_id:
          type: string
        errors:
          type: object
          properties:
            data:
              type: array
              items:
                type: object
                properties:
                  code:
                    type: string
                  line:
                    type: integer
                  message:
                    type: string
                  param:
                    type: string
                additionalProperties: false
                title: BatchError
            object:
              type: string
          additionalProperties: false
          title: Errors
        expired_at:
          type: integer
        expires_at:
          type: integer
        failed_at:
          type: integer
        finalizing_at:
          type: integer
        in_progress_at:
          type: integer
        metadata:
          type: object
          additionalProperties:
            type: string
        model:
          type: string
        output_file_id:
          type: string
        request_counts:
          type: object
          properties:
            completed:
              type: integer
            failed:
              type: integer
            total:
              type: integer
          additionalProperties: false
          required:
            - completed
            - failed
            - total
          title: BatchRequestCounts
        usage:
          type: object
          properties:
            input_tokens:
              type: integer
            input_tokens_details:
              type: object
              properties:
                cached_tokens:
                  type: integer
              additionalProperties: false
              required:
                - cached_tokens
              title: InputTokensDetails
            output_tokens:
              type: integer
            output_tokens_details:
              type: object
              properties:
                reasoning_tokens:
                  type: integer
              additionalProperties: false
              required:
                - reasoning_tokens
              title: OutputTokensDetails
            total_tokens:
              type: integer
          additionalProperties: false
          required:
            - input_tokens
            - input_tokens_details
            - output_tokens
            - output_tokens_details
            - total_tokens
          title: BatchUsage
      additionalProperties: false
      required:
        - id
        - completion_window
        - created_at
        - endpoint
        - input_file_id
        - object
        - status
      title: Batch
    Order:
      type: string
      enum:
@ -10263,6 +10723,19 @@ tags:
      - **Responses API**: Use the stable v1 Responses API endpoints
    x-displayName: Agents
  - name: Batches
    description: >-
      The API is designed to allow use of openai client libraries for seamless integration.
      This API provides the following extensions:
       - idempotent batch creation
      Note: This API is currently under active development and may undergo changes.
    x-displayName: >-
      The Batches API enables efficient processing of multiple requests in a single
      operation, particularly useful for processing large datasets, batch evaluation
      workflows, and cost-effective inference at scale.
  - name: Benchmarks
    description: ''
  - name: DatasetIO
@ -10308,6 +10781,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
      - Batches
      - Benchmarks
      - DatasetIO
      - Datasets
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -40,6 +40,193 @@
        }
    ],
    "paths": {
        "/v1/batches": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A list of batch objects.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ListBatchesResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "List all batches for the current user.",
                "description": "List all batches for the current user.",
                "parameters": [
                    {
                        "name": "after",
                        "in": "query",
                        "description": "A cursor for pagination; returns batches after this batch ID.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "limit",
                        "in": "query",
                        "description": "Number of batches to return (default 20, max 100).",
                        "required": true,
                        "schema": {
                            "type": "integer"
                        }
                    }
                ],
                "deprecated": false
            },
            "post": {
                "responses": {
                    "200": {
                        "description": "The created batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Create a new batch for processing multiple API requests.",
                "description": "Create a new batch for processing multiple API requests.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/CreateBatchRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1/batches/{batch_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "The batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Retrieve information about a specific batch.",
                "description": "Retrieve information about a specific batch.",
                "parameters": [
                    {
                        "name": "batch_id",
                        "in": "path",
                        "description": "The ID of the batch to retrieve.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": false
            }
        },
        "/v1/batches/{batch_id}/cancel": {
            "post": {
                "responses": {
                    "200": {
                        "description": "The updated batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Cancel a batch that is in progress.",
                "description": "Cancel a batch that is in progress.",
                "parameters": [
                    {
                        "name": "batch_id",
                        "in": "path",
                        "description": "The ID of the batch to cancel.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": false
            }
        },
        "/v1/chat/completions": {
            "get": {
                "responses": {
@ -4005,6 +4192,451 @@
                "title": "Error",
                "description": "Error response from the API. Roughly follows RFC 7807."
            },
            "ListBatchesResponse": {
                "type": "object",
                "properties": {
                    "object": {
                        "type": "string",
                        "const": "list",
                        "default": "list"
                    },
                    "data": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "id": {
                                    "type": "string"
                                },
                                "completion_window": {
                                    "type": "string"
                                },
                                "created_at": {
                                    "type": "integer"
                                },
                                "endpoint": {
                                    "type": "string"
                                },
                                "input_file_id": {
                                    "type": "string"
                                },
                                "object": {
                                    "type": "string",
                                    "const": "batch"
                                },
                                "status": {
                                    "type": "string",
                                    "enum": [
                                        "validating",
                                        "failed",
                                        "in_progress",
                                        "finalizing",
                                        "completed",
                                        "expired",
                                        "cancelling",
                                        "cancelled"
                                    ]
                                },
                                "cancelled_at": {
                                    "type": "integer"
                                },
                                "cancelling_at": {
                                    "type": "integer"
                                },
                                "completed_at": {
                                    "type": "integer"
                                },
                                "error_file_id": {
                                    "type": "string"
                                },
                                "errors": {
                                    "type": "object",
                                    "properties": {
                                        "data": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "code": {
                                                        "type": "string"
                                                    },
                                                    "line": {
                                                        "type": "integer"
                                                    },
                                                    "message": {
                                                        "type": "string"
                                                    },
                                                    "param": {
                                                        "type": "string"
                                                    }
                                                },
                                                "additionalProperties": false,
                                                "title": "BatchError"
                                            }
                                        },
                                        "object": {
                                            "type": "string"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "title": "Errors"
                                },
                                "expired_at": {
                                    "type": "integer"
                                },
                                "expires_at": {
                                    "type": "integer"
                                },
                                "failed_at": {
                                    "type": "integer"
                                },
                                "finalizing_at": {
                                    "type": "integer"
                                },
                                "in_progress_at": {
                                    "type": "integer"
                                },
                                "metadata": {
                                    "type": "object",
                                    "additionalProperties": {
                                        "type": "string"
                                    }
                                },
                                "model": {
                                    "type": "string"
                                },
                                "output_file_id": {
                                    "type": "string"
                                },
                                "request_counts": {
                                    "type": "object",
                                    "properties": {
                                        "completed": {
                                            "type": "integer"
                                        },
                                        "failed": {
                                            "type": "integer"
                                        },
                                        "total": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "completed",
                                        "failed",
                                        "total"
                                    ],
                                    "title": "BatchRequestCounts"
                                },
                                "usage": {
                                    "type": "object",
                                    "properties": {
                                        "input_tokens": {
                                            "type": "integer"
                                        },
                                        "input_tokens_details": {
                                            "type": "object",
                                            "properties": {
                                                "cached_tokens": {
                                                    "type": "integer"
                                                }
                                            },
                                            "additionalProperties": false,
                                            "required": [
                                                "cached_tokens"
                                            ],
                                            "title": "InputTokensDetails"
                                        },
                                        "output_tokens": {
                                            "type": "integer"
                                        },
                                        "output_tokens_details": {
                                            "type": "object",
                                            "properties": {
                                                "reasoning_tokens": {
                                                    "type": "integer"
                                                }
                                            },
                                            "additionalProperties": false,
                                            "required": [
                                                "reasoning_tokens"
                                            ],
                                            "title": "OutputTokensDetails"
                                        },
                                        "total_tokens": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "input_tokens",
                                        "input_tokens_details",
                                        "output_tokens",
                                        "output_tokens_details",
                                        "total_tokens"
                                    ],
                                    "title": "BatchUsage"
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "id",
                                "completion_window",
                                "created_at",
                                "endpoint",
                                "input_file_id",
                                "object",
                                "status"
                            ],
                            "title": "Batch"
                        }
                    },
                    "first_id": {
                        "type": "string"
                    },
                    "last_id": {
                        "type": "string"
                    },
                    "has_more": {
                        "type": "boolean",
                        "default": false
                    }
                },
                "additionalProperties": false,
                "required": [
                    "object",
                    "data",
                    "has_more"
                ],
                "title": "ListBatchesResponse",
                "description": "Response containing a list of batch objects."
            },
            "CreateBatchRequest": {
                "type": "object",
                "properties": {
                    "input_file_id": {
                        "type": "string",
                        "description": "The ID of an uploaded file containing requests for the batch."
                    },
                    "endpoint": {
                        "type": "string",
                        "description": "The endpoint to be used for all requests in the batch."
                    },
                    "completion_window": {
                        "type": "string",
                        "const": "24h",
                        "description": "The time window within which the batch should be processed."
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "string"
                        },
                        "description": "Optional metadata for the batch."
                    },
                    "idempotency_key": {
                        "type": "string",
                        "description": "Optional idempotency key. When provided, enables idempotent behavior."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "input_file_id",
                    "endpoint",
                    "completion_window"
                ],
                "title": "CreateBatchRequest"
            },
            "Batch": {
                "type": "object",
                "properties": {
                    "id": {
                        "type": "string"
                    },
                    "completion_window": {
                        "type": "string"
                    },
                    "created_at": {
                        "type": "integer"
                    },
                    "endpoint": {
                        "type": "string"
                    },
                    "input_file_id": {
                        "type": "string"
                    },
                    "object": {
                        "type": "string",
                        "const": "batch"
                    },
                    "status": {
                        "type": "string",
                        "enum": [
                            "validating",
                            "failed",
                            "in_progress",
                            "finalizing",
                            "completed",
                            "expired",
                            "cancelling",
                            "cancelled"
                        ]
                    },
                    "cancelled_at": {
                        "type": "integer"
                    },
                    "cancelling_at": {
                        "type": "integer"
                    },
                    "completed_at": {
                        "type": "integer"
                    },
                    "error_file_id": {
                        "type": "string"
                    },
                    "errors": {
                        "type": "object",
                        "properties": {
                            "data": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "properties": {
                                        "code": {
                                            "type": "string"
                                        },
                                        "line": {
                                            "type": "integer"
                                        },
                                        "message": {
                                            "type": "string"
                                        },
                                        "param": {
                                            "type": "string"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "title": "BatchError"
                                }
                            },
                            "object": {
                                "type": "string"
                            }
                        },
                        "additionalProperties": false,
                        "title": "Errors"
                    },
                    "expired_at": {
                        "type": "integer"
                    },
                    "expires_at": {
                        "type": "integer"
                    },
                    "failed_at": {
                        "type": "integer"
                    },
                    "finalizing_at": {
                        "type": "integer"
                    },
                    "in_progress_at": {
                        "type": "integer"
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "string"
                        }
                    },
                    "model": {
                        "type": "string"
                    },
                    "output_file_id": {
                        "type": "string"
                    },
                    "request_counts": {
                        "type": "object",
                        "properties": {
                            "completed": {
                                "type": "integer"
                            },
                            "failed": {
                                "type": "integer"
                            },
                            "total": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "completed",
                            "failed",
                            "total"
                        ],
                        "title": "BatchRequestCounts"
                    },
                    "usage": {
                        "type": "object",
                        "properties": {
                            "input_tokens": {
                                "type": "integer"
                            },
                            "input_tokens_details": {
                                "type": "object",
                                "properties": {
                                    "cached_tokens": {
                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "cached_tokens"
                                ],
                                "title": "InputTokensDetails"
                            },
                            "output_tokens": {
                                "type": "integer"
                            },
                            "output_tokens_details": {
                                "type": "object",
                                "properties": {
                                    "reasoning_tokens": {
                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "reasoning_tokens"
                                ],
                                "title": "OutputTokensDetails"
                            },
                            "total_tokens": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "input_tokens",
                            "input_tokens_details",
                            "output_tokens",
                            "output_tokens_details",
                            "total_tokens"
                        ],
                        "title": "BatchUsage"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "id",
                    "completion_window",
                    "created_at",
                    "endpoint",
                    "input_file_id",
                    "object",
                    "status"
                ],
                "title": "Batch"
            },
            "Order": {
                "type": "string",
                "enum": [
@ -11897,6 +12529,10 @@
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
                    },
                    "chunk_id": {
                        "type": "string",
                        "description": "Unique identifier for the chunk. Must be provided explicitly."
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
@ -11930,10 +12566,6 @@
                        },
                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                    },
                    "stored_chunk_id": {
                        "type": "string",
                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
                    },
                    "chunk_metadata": {
                        "$ref": "#/components/schemas/ChunkMetadata",
                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -11942,6 +12574,7 @@
                "additionalProperties": false,
                "required": [
                    "content",
                    "chunk_id",
                    "metadata"
                ],
                "title": "Chunk",
@ -13288,6 +13921,11 @@
            "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n  - Supports dynamic `vector_store_ids` per call\n  - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
            "x-displayName": "Agents"
        },
        {
            "name": "Batches",
            "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
            "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
        },
        {
            "name": "Conversations",
            "description": "Protocol for conversation management operations.",
@ -13361,6 +13999,7 @@
            "name": "Operations",
            "tags": [
                "Agents",
                "Batches",
                "Conversations",
                "Files",
                "Inference",
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -12,6 +12,141 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
  /v1/batches:
    get:
      responses:
        '200':
          description: A list of batch objects.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBatchesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: List all batches for the current user.
      description: List all batches for the current user.
      parameters:
        - name: after
          in: query
          description: >-
            A cursor for pagination; returns batches after this batch ID.
          required: false
          schema:
            type: string
        - name: limit
          in: query
          description: >-
            Number of batches to return (default 20, max 100).
          required: true
          schema:
            type: integer
      deprecated: false
    post:
      responses:
        '200':
          description: The created batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Create a new batch for processing multiple API requests.
      description: >-
        Create a new batch for processing multiple API requests.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateBatchRequest'
        required: true
      deprecated: false
  /v1/batches/{batch_id}:
    get:
      responses:
        '200':
          description: The batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Retrieve information about a specific batch.
      description: >-
        Retrieve information about a specific batch.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to retrieve.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/batches/{batch_id}/cancel:
    post:
      responses:
        '200':
          description: The updated batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: Cancel a batch that is in progress.
      description: Cancel a batch that is in progress.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to cancel.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/chat/completions:
    get:
      responses:
@ -2999,6 +3134,331 @@ components:
      title: Error
      description: >-
        Error response from the API. Roughly follows RFC 7807.
    ListBatchesResponse:
      type: object
      properties:
        object:
          type: string
          const: list
          default: list
        data:
          type: array
          items:
            type: object
            properties:
              id:
                type: string
              completion_window:
                type: string
              created_at:
                type: integer
              endpoint:
                type: string
              input_file_id:
                type: string
              object:
                type: string
                const: batch
              status:
                type: string
                enum:
                  - validating
                  - failed
                  - in_progress
                  - finalizing
                  - completed
                  - expired
                  - cancelling
                  - cancelled
              cancelled_at:
                type: integer
              cancelling_at:
                type: integer
              completed_at:
                type: integer
              error_file_id:
                type: string
              errors:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      type: object
                      properties:
                        code:
                          type: string
                        line:
                          type: integer
                        message:
                          type: string
                        param:
                          type: string
                      additionalProperties: false
                      title: BatchError
                  object:
                    type: string
                additionalProperties: false
                title: Errors
              expired_at:
                type: integer
              expires_at:
                type: integer
              failed_at:
                type: integer
              finalizing_at:
                type: integer
              in_progress_at:
                type: integer
              metadata:
                type: object
                additionalProperties:
                  type: string
              model:
                type: string
              output_file_id:
                type: string
              request_counts:
                type: object
                properties:
                  completed:
                    type: integer
                  failed:
                    type: integer
                  total:
                    type: integer
                additionalProperties: false
                required:
                  - completed
                  - failed
                  - total
                title: BatchRequestCounts
              usage:
                type: object
                properties:
                  input_tokens:
                    type: integer
                  input_tokens_details:
                    type: object
                    properties:
                      cached_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - cached_tokens
                    title: InputTokensDetails
                  output_tokens:
                    type: integer
                  output_tokens_details:
                    type: object
                    properties:
                      reasoning_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - reasoning_tokens
                    title: OutputTokensDetails
                  total_tokens:
                    type: integer
                additionalProperties: false
                required:
                  - input_tokens
                  - input_tokens_details
                  - output_tokens
                  - output_tokens_details
                  - total_tokens
                title: BatchUsage
            additionalProperties: false
            required:
              - id
              - completion_window
              - created_at
              - endpoint
              - input_file_id
              - object
              - status
            title: Batch
        first_id:
          type: string
        last_id:
          type: string
        has_more:
          type: boolean
          default: false
      additionalProperties: false
      required:
        - object
        - data
        - has_more
      title: ListBatchesResponse
      description: >-
        Response containing a list of batch objects.
    CreateBatchRequest:
      type: object
      properties:
        input_file_id:
          type: string
          description: >-
            The ID of an uploaded file containing requests for the batch.
        endpoint:
          type: string
          description: >-
            The endpoint to be used for all requests in the batch.
        completion_window:
          type: string
          const: 24h
          description: >-
            The time window within which the batch should be processed.
        metadata:
          type: object
          additionalProperties:
            type: string
          description: Optional metadata for the batch.
        idempotency_key:
          type: string
          description: >-
            Optional idempotency key. When provided, enables idempotent behavior.
      additionalProperties: false
      required:
        - input_file_id
        - endpoint
        - completion_window
      title: CreateBatchRequest
    Batch:
      type: object
      properties:
        id:
          type: string
        completion_window:
          type: string
        created_at:
          type: integer
        endpoint:
          type: string
        input_file_id:
          type: string
        object:
          type: string
          const: batch
        status:
          type: string
          enum:
            - validating
            - failed
            - in_progress
            - finalizing
            - completed
            - expired
            - cancelling
            - cancelled
        cancelled_at:
          type: integer
        cancelling_at:
          type: integer
        completed_at:
          type: integer
        error_file_id:
          type: string
        errors:
          type: object
          properties:
            data:
              type: array
              items:
                type: object
                properties:
                  code:
                    type: string
                  line:
                    type: integer
                  message:
                    type: string
                  param:
                    type: string
                additionalProperties: false
                title: BatchError
            object:
              type: string
          additionalProperties: false
          title: Errors
        expired_at:
          type: integer
        expires_at:
          type: integer
        failed_at:
          type: integer
        finalizing_at:
          type: integer
        in_progress_at:
          type: integer
        metadata:
          type: object
          additionalProperties:
            type: string
        model:
          type: string
        output_file_id:
          type: string
        request_counts:
          type: object
          properties:
            completed:
              type: integer
            failed:
              type: integer
            total:
              type: integer
          additionalProperties: false
          required:
            - completed
            - failed
            - total
          title: BatchRequestCounts
        usage:
          type: object
          properties:
            input_tokens:
              type: integer
            input_tokens_details:
              type: object
              properties:
                cached_tokens:
                  type: integer
              additionalProperties: false
              required:
                - cached_tokens
              title: InputTokensDetails
            output_tokens:
              type: integer
            output_tokens_details:
              type: object
              properties:
                reasoning_tokens:
                  type: integer
              additionalProperties: false
              required:
                - reasoning_tokens
              title: OutputTokensDetails
            total_tokens:
              type: integer
          additionalProperties: false
          required:
            - input_tokens
            - input_tokens_details
            - output_tokens
            - output_tokens_details
            - total_tokens
          title: BatchUsage
      additionalProperties: false
      required:
        - id
        - completion_window
        - created_at
        - endpoint
        - input_file_id
        - object
        - status
      title: Batch
    Order:
      type: string
      enum:
@ -9045,6 +9505,10 @@ components:
          description: >-
            The content of the chunk, which can be interleaved text, images, or other
            types.
        chunk_id:
          type: string
          description: >-
            Unique identifier for the chunk. Must be provided explicitly.
        metadata:
          type: object
          additionalProperties:
@ -9065,10 +9529,6 @@ components:
          description: >-
            Optional embedding for the chunk. If not provided, it will be computed
            later.
        stored_chunk_id:
          type: string
          description: >-
            The chunk ID that is stored in the vector database. Used for backend functionality.
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: >-
@ -9077,6 +9537,7 @@ components:
      additionalProperties: false
      required:
        - content
        - chunk_id
        - metadata
      title: Chunk
      description: >-
@ -10143,6 +10604,19 @@ tags:
      - `background`
    x-displayName: Agents
  - name: Batches
    description: >-
      The API is designed to allow use of openai client libraries for seamless integration.
      This API provides the following extensions:
       - idempotent batch creation
      Note: This API is currently under active development and may undergo changes.
    x-displayName: >-
      The Batches API enables efficient processing of multiple requests in a single
      operation, particularly useful for processing large datasets, batch evaluation
      workflows, and cost-effective inference at scale.
  - name: Conversations
    description: >-
      Protocol for conversation management operations.
@ -10205,6 +10679,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
      - Batches
      - Conversations
      - Files
      - Inference
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -40,6 +40,193 @@
        }
    ],
    "paths": {
        "/v1/batches": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A list of batch objects.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ListBatchesResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "List all batches for the current user.",
                "description": "List all batches for the current user.",
                "parameters": [
                    {
                        "name": "after",
                        "in": "query",
                        "description": "A cursor for pagination; returns batches after this batch ID.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    },
                    {
                        "name": "limit",
                        "in": "query",
                        "description": "Number of batches to return (default 20, max 100).",
                        "required": true,
                        "schema": {
                            "type": "integer"
                        }
                    }
                ],
                "deprecated": false
            },
            "post": {
                "responses": {
                    "200": {
                        "description": "The created batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Create a new batch for processing multiple API requests.",
                "description": "Create a new batch for processing multiple API requests.",
                "parameters": [],
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "$ref": "#/components/schemas/CreateBatchRequest"
                            }
                        }
                    },
                    "required": true
                },
                "deprecated": false
            }
        },
        "/v1/batches/{batch_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "The batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Retrieve information about a specific batch.",
                "description": "Retrieve information about a specific batch.",
                "parameters": [
                    {
                        "name": "batch_id",
                        "in": "path",
                        "description": "The ID of the batch to retrieve.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": false
            }
        },
        "/v1/batches/{batch_id}/cancel": {
            "post": {
                "responses": {
                    "200": {
                        "description": "The updated batch object.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Batch"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Batches"
                ],
                "summary": "Cancel a batch that is in progress.",
                "description": "Cancel a batch that is in progress.",
                "parameters": [
                    {
                        "name": "batch_id",
                        "in": "path",
                        "description": "The ID of the batch to cancel.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ],
                "deprecated": false
            }
        },
        "/v1/chat/completions": {
            "get": {
                "responses": {
@ -5677,6 +5864,451 @@
                "title": "Error",
                "description": "Error response from the API. Roughly follows RFC 7807."
            },
            "ListBatchesResponse": {
                "type": "object",
                "properties": {
                    "object": {
                        "type": "string",
                        "const": "list",
                        "default": "list"
                    },
                    "data": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "id": {
                                    "type": "string"
                                },
                                "completion_window": {
                                    "type": "string"
                                },
                                "created_at": {
                                    "type": "integer"
                                },
                                "endpoint": {
                                    "type": "string"
                                },
                                "input_file_id": {
                                    "type": "string"
                                },
                                "object": {
                                    "type": "string",
                                    "const": "batch"
                                },
                                "status": {
                                    "type": "string",
                                    "enum": [
                                        "validating",
                                        "failed",
                                        "in_progress",
                                        "finalizing",
                                        "completed",
                                        "expired",
                                        "cancelling",
                                        "cancelled"
                                    ]
                                },
                                "cancelled_at": {
                                    "type": "integer"
                                },
                                "cancelling_at": {
                                    "type": "integer"
                                },
                                "completed_at": {
                                    "type": "integer"
                                },
                                "error_file_id": {
                                    "type": "string"
                                },
                                "errors": {
                                    "type": "object",
                                    "properties": {
                                        "data": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "code": {
                                                        "type": "string"
                                                    },
                                                    "line": {
                                                        "type": "integer"
                                                    },
                                                    "message": {
                                                        "type": "string"
                                                    },
                                                    "param": {
                                                        "type": "string"
                                                    }
                                                },
                                                "additionalProperties": false,
                                                "title": "BatchError"
                                            }
                                        },
                                        "object": {
                                            "type": "string"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "title": "Errors"
                                },
                                "expired_at": {
                                    "type": "integer"
                                },
                                "expires_at": {
                                    "type": "integer"
                                },
                                "failed_at": {
                                    "type": "integer"
                                },
                                "finalizing_at": {
                                    "type": "integer"
                                },
                                "in_progress_at": {
                                    "type": "integer"
                                },
                                "metadata": {
                                    "type": "object",
                                    "additionalProperties": {
                                        "type": "string"
                                    }
                                },
                                "model": {
                                    "type": "string"
                                },
                                "output_file_id": {
                                    "type": "string"
                                },
                                "request_counts": {
                                    "type": "object",
                                    "properties": {
                                        "completed": {
                                            "type": "integer"
                                        },
                                        "failed": {
                                            "type": "integer"
                                        },
                                        "total": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "completed",
                                        "failed",
                                        "total"
                                    ],
                                    "title": "BatchRequestCounts"
                                },
                                "usage": {
                                    "type": "object",
                                    "properties": {
                                        "input_tokens": {
                                            "type": "integer"
                                        },
                                        "input_tokens_details": {
                                            "type": "object",
                                            "properties": {
                                                "cached_tokens": {
                                                    "type": "integer"
                                                }
                                            },
                                            "additionalProperties": false,
                                            "required": [
                                                "cached_tokens"
                                            ],
                                            "title": "InputTokensDetails"
                                        },
                                        "output_tokens": {
                                            "type": "integer"
                                        },
                                        "output_tokens_details": {
                                            "type": "object",
                                            "properties": {
                                                "reasoning_tokens": {
                                                    "type": "integer"
                                                }
                                            },
                                            "additionalProperties": false,
                                            "required": [
                                                "reasoning_tokens"
                                            ],
                                            "title": "OutputTokensDetails"
                                        },
                                        "total_tokens": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "input_tokens",
                                        "input_tokens_details",
                                        "output_tokens",
                                        "output_tokens_details",
                                        "total_tokens"
                                    ],
                                    "title": "BatchUsage"
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "id",
                                "completion_window",
                                "created_at",
                                "endpoint",
                                "input_file_id",
                                "object",
                                "status"
                            ],
                            "title": "Batch"
                        }
                    },
                    "first_id": {
                        "type": "string"
                    },
                    "last_id": {
                        "type": "string"
                    },
                    "has_more": {
                        "type": "boolean",
                        "default": false
                    }
                },
                "additionalProperties": false,
                "required": [
                    "object",
                    "data",
                    "has_more"
                ],
                "title": "ListBatchesResponse",
                "description": "Response containing a list of batch objects."
            },
            "CreateBatchRequest": {
                "type": "object",
                "properties": {
                    "input_file_id": {
                        "type": "string",
                        "description": "The ID of an uploaded file containing requests for the batch."
                    },
                    "endpoint": {
                        "type": "string",
                        "description": "The endpoint to be used for all requests in the batch."
                    },
                    "completion_window": {
                        "type": "string",
                        "const": "24h",
                        "description": "The time window within which the batch should be processed."
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "string"
                        },
                        "description": "Optional metadata for the batch."
                    },
                    "idempotency_key": {
                        "type": "string",
                        "description": "Optional idempotency key. When provided, enables idempotent behavior."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "input_file_id",
                    "endpoint",
                    "completion_window"
                ],
                "title": "CreateBatchRequest"
            },
            "Batch": {
                "type": "object",
                "properties": {
                    "id": {
                        "type": "string"
                    },
                    "completion_window": {
                        "type": "string"
                    },
                    "created_at": {
                        "type": "integer"
                    },
                    "endpoint": {
                        "type": "string"
                    },
                    "input_file_id": {
                        "type": "string"
                    },
                    "object": {
                        "type": "string",
                        "const": "batch"
                    },
                    "status": {
                        "type": "string",
                        "enum": [
                            "validating",
                            "failed",
                            "in_progress",
                            "finalizing",
                            "completed",
                            "expired",
                            "cancelling",
                            "cancelled"
                        ]
                    },
                    "cancelled_at": {
                        "type": "integer"
                    },
                    "cancelling_at": {
                        "type": "integer"
                    },
                    "completed_at": {
                        "type": "integer"
                    },
                    "error_file_id": {
                        "type": "string"
                    },
                    "errors": {
                        "type": "object",
                        "properties": {
                            "data": {
                                "type": "array",
                                "items": {
                                    "type": "object",
                                    "properties": {
                                        "code": {
                                            "type": "string"
                                        },
                                        "line": {
                                            "type": "integer"
                                        },
                                        "message": {
                                            "type": "string"
                                        },
                                        "param": {
                                            "type": "string"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "title": "BatchError"
                                }
                            },
                            "object": {
                                "type": "string"
                            }
                        },
                        "additionalProperties": false,
                        "title": "Errors"
                    },
                    "expired_at": {
                        "type": "integer"
                    },
                    "expires_at": {
                        "type": "integer"
                    },
                    "failed_at": {
                        "type": "integer"
                    },
                    "finalizing_at": {
                        "type": "integer"
                    },
                    "in_progress_at": {
                        "type": "integer"
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "string"
                        }
                    },
                    "model": {
                        "type": "string"
                    },
                    "output_file_id": {
                        "type": "string"
                    },
                    "request_counts": {
                        "type": "object",
                        "properties": {
                            "completed": {
                                "type": "integer"
                            },
                            "failed": {
                                "type": "integer"
                            },
                            "total": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "completed",
                            "failed",
                            "total"
                        ],
                        "title": "BatchRequestCounts"
                    },
                    "usage": {
                        "type": "object",
                        "properties": {
                            "input_tokens": {
                                "type": "integer"
                            },
                            "input_tokens_details": {
                                "type": "object",
                                "properties": {
                                    "cached_tokens": {
                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "cached_tokens"
                                ],
                                "title": "InputTokensDetails"
                            },
                            "output_tokens": {
                                "type": "integer"
                            },
                            "output_tokens_details": {
                                "type": "object",
                                "properties": {
                                    "reasoning_tokens": {
                                        "type": "integer"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "reasoning_tokens"
                                ],
                                "title": "OutputTokensDetails"
                            },
                            "total_tokens": {
                                "type": "integer"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "input_tokens",
                            "input_tokens_details",
                            "output_tokens",
                            "output_tokens_details",
                            "total_tokens"
                        ],
                        "title": "BatchUsage"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "id",
                    "completion_window",
                    "created_at",
                    "endpoint",
                    "input_file_id",
                    "object",
                    "status"
                ],
                "title": "Batch"
            },
            "Order": {
                "type": "string",
                "enum": [
@ -13569,6 +14201,10 @@
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
                    },
                    "chunk_id": {
                        "type": "string",
                        "description": "Unique identifier for the chunk. Must be provided explicitly."
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
@ -13602,10 +14238,6 @@
                        },
                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                    },
                    "stored_chunk_id": {
                        "type": "string",
                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
                    },
                    "chunk_metadata": {
                        "$ref": "#/components/schemas/ChunkMetadata",
                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -13614,6 +14246,7 @@
                "additionalProperties": false,
                "required": [
                    "content",
                    "chunk_id",
                    "metadata"
                ],
                "title": "Chunk",
@ -17960,6 +18593,11 @@
            "description": "APIs for creating and interacting with agentic systems.",
            "x-displayName": "Agents"
        },
        {
            "name": "Batches",
            "description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
            "x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
        },
        {
            "name": "Benchmarks",
            "description": ""
@ -18054,6 +18692,7 @@
            "name": "Operations",
            "tags": [
                "Agents",
                "Batches",
                "Benchmarks",
                "Conversations",
                "DatasetIO",
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -15,6 +15,141 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
  /v1/batches:
    get:
      responses:
        '200':
          description: A list of batch objects.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBatchesResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: List all batches for the current user.
      description: List all batches for the current user.
      parameters:
        - name: after
          in: query
          description: >-
            A cursor for pagination; returns batches after this batch ID.
          required: false
          schema:
            type: string
        - name: limit
          in: query
          description: >-
            Number of batches to return (default 20, max 100).
          required: true
          schema:
            type: integer
      deprecated: false
    post:
      responses:
        '200':
          description: The created batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Create a new batch for processing multiple API requests.
      description: >-
        Create a new batch for processing multiple API requests.
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateBatchRequest'
        required: true
      deprecated: false
  /v1/batches/{batch_id}:
    get:
      responses:
        '200':
          description: The batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: >-
        Retrieve information about a specific batch.
      description: >-
        Retrieve information about a specific batch.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to retrieve.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/batches/{batch_id}/cancel:
    post:
      responses:
        '200':
          description: The updated batch object.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Batch'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Batches
      summary: Cancel a batch that is in progress.
      description: Cancel a batch that is in progress.
      parameters:
        - name: batch_id
          in: path
          description: The ID of the batch to cancel.
          required: true
          schema:
            type: string
      deprecated: false
  /v1/chat/completions:
    get:
      responses:
@ -4212,6 +4347,331 @@ components:
      title: Error
      description: >-
        Error response from the API. Roughly follows RFC 7807.
    ListBatchesResponse:
      type: object
      properties:
        object:
          type: string
          const: list
          default: list
        data:
          type: array
          items:
            type: object
            properties:
              id:
                type: string
              completion_window:
                type: string
              created_at:
                type: integer
              endpoint:
                type: string
              input_file_id:
                type: string
              object:
                type: string
                const: batch
              status:
                type: string
                enum:
                  - validating
                  - failed
                  - in_progress
                  - finalizing
                  - completed
                  - expired
                  - cancelling
                  - cancelled
              cancelled_at:
                type: integer
              cancelling_at:
                type: integer
              completed_at:
                type: integer
              error_file_id:
                type: string
              errors:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      type: object
                      properties:
                        code:
                          type: string
                        line:
                          type: integer
                        message:
                          type: string
                        param:
                          type: string
                      additionalProperties: false
                      title: BatchError
                  object:
                    type: string
                additionalProperties: false
                title: Errors
              expired_at:
                type: integer
              expires_at:
                type: integer
              failed_at:
                type: integer
              finalizing_at:
                type: integer
              in_progress_at:
                type: integer
              metadata:
                type: object
                additionalProperties:
                  type: string
              model:
                type: string
              output_file_id:
                type: string
              request_counts:
                type: object
                properties:
                  completed:
                    type: integer
                  failed:
                    type: integer
                  total:
                    type: integer
                additionalProperties: false
                required:
                  - completed
                  - failed
                  - total
                title: BatchRequestCounts
              usage:
                type: object
                properties:
                  input_tokens:
                    type: integer
                  input_tokens_details:
                    type: object
                    properties:
                      cached_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - cached_tokens
                    title: InputTokensDetails
                  output_tokens:
                    type: integer
                  output_tokens_details:
                    type: object
                    properties:
                      reasoning_tokens:
                        type: integer
                    additionalProperties: false
                    required:
                      - reasoning_tokens
                    title: OutputTokensDetails
                  total_tokens:
                    type: integer
                additionalProperties: false
                required:
                  - input_tokens
                  - input_tokens_details
                  - output_tokens
                  - output_tokens_details
                  - total_tokens
                title: BatchUsage
            additionalProperties: false
            required:
              - id
              - completion_window
              - created_at
              - endpoint
              - input_file_id
              - object
              - status
            title: Batch
        first_id:
          type: string
        last_id:
          type: string
        has_more:
          type: boolean
          default: false
      additionalProperties: false
      required:
        - object
        - data
        - has_more
      title: ListBatchesResponse
      description: >-
        Response containing a list of batch objects.
    CreateBatchRequest:
      type: object
      properties:
        input_file_id:
          type: string
          description: >-
            The ID of an uploaded file containing requests for the batch.
        endpoint:
          type: string
          description: >-
            The endpoint to be used for all requests in the batch.
        completion_window:
          type: string
          const: 24h
          description: >-
            The time window within which the batch should be processed.
        metadata:
          type: object
          additionalProperties:
            type: string
          description: Optional metadata for the batch.
        idempotency_key:
          type: string
          description: >-
            Optional idempotency key. When provided, enables idempotent behavior.
      additionalProperties: false
      required:
        - input_file_id
        - endpoint
        - completion_window
      title: CreateBatchRequest
    Batch:
      type: object
      properties:
        id:
          type: string
        completion_window:
          type: string
        created_at:
          type: integer
        endpoint:
          type: string
        input_file_id:
          type: string
        object:
          type: string
          const: batch
        status:
          type: string
          enum:
            - validating
            - failed
            - in_progress
            - finalizing
            - completed
            - expired
            - cancelling
            - cancelled
        cancelled_at:
          type: integer
        cancelling_at:
          type: integer
        completed_at:
          type: integer
        error_file_id:
          type: string
        errors:
          type: object
          properties:
            data:
              type: array
              items:
                type: object
                properties:
                  code:
                    type: string
                  line:
                    type: integer
                  message:
                    type: string
                  param:
                    type: string
                additionalProperties: false
                title: BatchError
            object:
              type: string
          additionalProperties: false
          title: Errors
        expired_at:
          type: integer
        expires_at:
          type: integer
        failed_at:
          type: integer
        finalizing_at:
          type: integer
        in_progress_at:
          type: integer
        metadata:
          type: object
          additionalProperties:
            type: string
        model:
          type: string
        output_file_id:
          type: string
        request_counts:
          type: object
          properties:
            completed:
              type: integer
            failed:
              type: integer
            total:
              type: integer
          additionalProperties: false
          required:
            - completed
            - failed
            - total
          title: BatchRequestCounts
        usage:
          type: object
          properties:
            input_tokens:
              type: integer
            input_tokens_details:
              type: object
              properties:
                cached_tokens:
                  type: integer
              additionalProperties: false
              required:
                - cached_tokens
              title: InputTokensDetails
            output_tokens:
              type: integer
            output_tokens_details:
              type: object
              properties:
                reasoning_tokens:
                  type: integer
              additionalProperties: false
              required:
                - reasoning_tokens
              title: OutputTokensDetails
            total_tokens:
              type: integer
          additionalProperties: false
          required:
            - input_tokens
            - input_tokens_details
            - output_tokens
            - output_tokens_details
            - total_tokens
          title: BatchUsage
      additionalProperties: false
      required:
        - id
        - completion_window
        - created_at
        - endpoint
        - input_file_id
        - object
        - status
      title: Batch
    Order:
      type: string
      enum:
@ -10258,6 +10718,10 @@ components:
          description: >-
            The content of the chunk, which can be interleaved text, images, or other
            types.
        chunk_id:
          type: string
          description: >-
            Unique identifier for the chunk. Must be provided explicitly.
        metadata:
          type: object
          additionalProperties:
@ -10278,10 +10742,6 @@ components:
          description: >-
            Optional embedding for the chunk. If not provided, it will be computed
            later.
        stored_chunk_id:
          type: string
          description: >-
            The chunk ID that is stored in the vector database. Used for backend functionality.
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: >-
@ -10290,6 +10750,7 @@ components:
      additionalProperties: false
      required:
        - content
        - chunk_id
        - metadata
      title: Chunk
      description: >-
@ -13527,6 +13988,19 @@ tags:
    description: >-
      APIs for creating and interacting with agentic systems.
    x-displayName: Agents
  - name: Batches
    description: >-
      The API is designed to allow use of openai client libraries for seamless integration.
      This API provides the following extensions:
       - idempotent batch creation
      Note: This API is currently under active development and may undergo changes.
    x-displayName: >-
      The Batches API enables efficient processing of multiple requests in a single
      operation, particularly useful for processing large datasets, batch evaluation
      workflows, and cost-effective inference at scale.
  - name: Benchmarks
    description: ''
  - name: Conversations
@ -13601,6 +14075,7 @@ x-tagGroups:
  - name: Operations
    tags:
      - Agents
      - Batches
      - Benchmarks
      - Conversations
      - DatasetIO
--- a/pyproject.toml
+++ b/pyproject.toml
@ -285,7 +285,6 @@ exclude = [
    "^src/llama_stack/models/llama/llama3/interface\\.py$",
    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
    "^src/llama_stack/providers/inline/agents/meta_reference/",
    "^src/llama_stack/providers/inline/datasetio/localfs/",
    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -313,8 +313,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    fi
    echo "Using image: $IMAGE_NAME"
-    docker run -d --network host --name "$container_name" \
+    # On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
-        -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+    # Use regular port mapping instead
    NETWORK_MODE=""
    PORT_MAPPINGS=""
    if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
        NETWORK_MODE="--network host"
    else
        # On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
        PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
        echo "Using bridge networking with port mapping (non-Linux)"
    fi
    docker run -d $NETWORK_MODE --name "$container_name" \
        $PORT_MAPPINGS \
        $DOCKER_ENV_VARS \
        "$IMAGE_NAME" \
        --port $LLAMA_STACK_PORT
--- a/src/llama_stack/apis/agents/openai_responses.py
+++ b/src/llama_stack/apis/agents/openai_responses.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from collections.abc import Sequence
 from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field, model_validator
@ -202,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
    scenarios.
    """
-    content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
+    content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
    role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
    type: Literal["message"] = "message"
@ -254,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
    """
    id: str
-    queries: list[str]
+    queries: Sequence[str]
    status: str
    type: Literal["file_search_call"] = "file_search_call"
-    results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
+    results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type
@ -597,7 +598,7 @@ class OpenAIResponseObject(BaseModel):
    id: str
    model: str
    object: Literal["response"] = "response"
-    output: list[OpenAIResponseOutput]
+    output: Sequence[OpenAIResponseOutput]
    parallel_tool_calls: bool = False
    previous_response_id: str | None = None
    prompt: OpenAIResponsePrompt | None = None
@ -607,7 +608,7 @@ class OpenAIResponseObject(BaseModel):
    # before the field was added. New responses will have this set always.
    text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
    top_p: float | None = None
-    tools: list[OpenAIResponseTool] | None = None
+    tools: Sequence[OpenAIResponseTool] | None = None
    truncation: str | None = None
    usage: OpenAIResponseUsage | None = None
    instructions: str | None = None
@ -1315,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
    :param object: Object type identifier, always "list"
    """
-    data: list[OpenAIResponseInput]
+    data: Sequence[OpenAIResponseInput]
    object: Literal["list"] = "list"
@ -1326,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
    :param input: List of input items that led to this response
    """
-    input: list[OpenAIResponseInput]
+    input: Sequence[OpenAIResponseInput]
    def to_response_object(self) -> OpenAIResponseObject:
        """Convert to OpenAIResponseObject by excluding input field."""
@ -1344,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
    :param object: Object type identifier, always "list"
    """
-    data: list[OpenAIResponseObjectWithInput]
+    data: Sequence[OpenAIResponseObjectWithInput]
    has_more: bool
    first_id: str
    last_id: str
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -8,7 +8,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import uuid
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body
@ -18,7 +17,6 @@ from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_stores import VectorStore
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.core.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
 from llama_stack.strong_typing.schema import register_schema
@ -61,38 +59,19 @@ class Chunk(BaseModel):
    """
    A chunk of content that can be inserted into a vector database.
    :param content: The content of the chunk, which can be interleaved text, images, or other types.
-    :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
+    :param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
    :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
-    :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
+    :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
    :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
        The `chunk_metadata` is required backend functionality.
    """
    content: InterleavedContent
    chunk_id: str
    metadata: dict[str, Any] = Field(default_factory=dict)
    embedding: list[float] | None = None
    # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
    stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
    chunk_metadata: ChunkMetadata | None = None
    model_config = {"populate_by_name": True}
    def model_post_init(self, __context):
        # Extract chunk_id from metadata if present
        if self.metadata and "chunk_id" in self.metadata:
            self.stored_chunk_id = self.metadata.pop("chunk_id")
    @property
    def chunk_id(self) -> str:
        """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
        if self.stored_chunk_id:
            return self.stored_chunk_id
        if "document_id" in self.metadata:
            return generate_chunk_id(self.metadata["document_id"], str(self.content))
        return generate_chunk_id(str(uuid.uuid4()), str(self.content))
    @property
    def document_id(self) -> str | None:
        """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@ -13,6 +13,8 @@ from llama_stack.core.datatypes import (
    ModelWithOwner,
    RegistryEntrySource,
 )
 from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
 from .common import CommonRoutingTableImpl, lookup_model
@ -42,11 +44,90 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            await self.update_registered_models(provider_id, models)
    async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
        """
        Fetch models from providers that have credentials in the current request's provider_data.
        This allows users to see models available to them from providers that require
        per-request API keys (via X-LlamaStack-Provider-Data header).
        Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
        cache them in the registry since they are user-specific.
        """
        provider_data = PROVIDER_DATA_VAR.get()
        if not provider_data:
            return []
        dynamic_models = []
        for provider_id, provider in self.impls_by_provider_id.items():
            # Check if this provider supports provider_data
            if not isinstance(provider, NeedsRequestProviderData):
                continue
            # Check if provider has a validator (some providers like ollama don't need per-request credentials)
            spec = getattr(provider, "__provider_spec__", None)
            if not spec or not getattr(spec, "provider_data_validator", None):
                continue
            # Validate provider_data silently - we're speculatively checking all providers
            # so validation failures are expected when user didn't provide keys for this provider
            try:
                validator = instantiate_class_type(spec.provider_data_validator)
                validator(**provider_data)
            except Exception:
                # User didn't provide credentials for this provider - skip silently
                continue
            # Validation succeeded! User has credentials for this provider
            # Now try to list models
            try:
                models = await provider.list_models()
                if not models:
                    continue
                # Ensure models have fully qualified identifiers with provider_id prefix
                for model in models:
                    # Only add prefix if model identifier doesn't already have it
                    if not model.identifier.startswith(f"{provider_id}/"):
                        model.identifier = f"{provider_id}/{model.provider_resource_id}"
                    dynamic_models.append(model)
                logger.debug(f"Fetched {len(models)} models from provider {provider_id} using provider_data")
            except Exception as e:
                logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
                continue
        return dynamic_models
    async def list_models(self) -> ListModelsResponse:
-        return ListModelsResponse(data=await self.get_all_with_type("model"))
+        # Get models from registry
        registry_models = await self.get_all_with_type("model")
        # Get additional models available via provider_data (user-specific, not cached)
        dynamic_models = await self._get_dynamic_models_from_provider_data()
        # Combine, avoiding duplicates (registry takes precedence)
        registry_identifiers = {m.identifier for m in registry_models}
        unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
        return ListModelsResponse(data=registry_models + unique_dynamic_models)
    async def openai_list_models(self) -> OpenAIListModelsResponse:
-        models = await self.get_all_with_type("model")
+        # Get models from registry
        registry_models = await self.get_all_with_type("model")
        # Get additional models available via provider_data (user-specific, not cached)
        dynamic_models = await self._get_dynamic_models_from_provider_data()
        # Combine, avoiding duplicates (registry takes precedence)
        registry_identifiers = {m.identifier for m in registry_models}
        unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
        all_models = registry_models + unique_dynamic_models
        openai_models = [
            OpenAIModel(
                id=model.identifier,
@ -54,7 +135,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
                created=int(time.time()),
                owned_by="llama_stack",
            )
-            for model in models
+            for model in all_models
        ]
        return OpenAIListModelsResponse(data=openai_models)
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@ -14,6 +14,7 @@ from typing import Any
 import yaml
 from llama_stack.apis.agents import Agents
 from llama_stack.apis.batches import Batches
 from llama_stack.apis.benchmarks import Benchmarks
 from llama_stack.apis.conversations import Conversations
 from llama_stack.apis.datasetio import DatasetIO
@ -63,6 +64,7 @@ class LlamaStack(
    Providers,
    Inference,
    Agents,
    Batches,
    Safety,
    SyntheticDataGeneration,
    Datasets,
--- a/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -11,6 +11,7 @@ import uuid
 import warnings
 from collections.abc import AsyncGenerator
 from datetime import UTC, datetime
 from typing import Any, cast
 import httpx
@ -125,12 +126,12 @@ class ChatAgent(ShieldRunnerMixin):
        )
    def turn_to_messages(self, turn: Turn) -> list[Message]:
-        messages = []
+        messages: list[Message] = []
        # NOTE: if a toolcall response is in a step, we do not add it when processing the input messages
        tool_call_ids = set()
        for step in turn.steps:
-            if step.step_type == StepType.tool_execution.value:
+            if step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
                for response in step.tool_responses:
                    tool_call_ids.add(response.call_id)
@ -149,9 +150,9 @@ class ChatAgent(ShieldRunnerMixin):
            messages.append(msg)
        for step in turn.steps:
-            if step.step_type == StepType.inference.value:
+            if step.step_type == StepType.inference.value and isinstance(step, InferenceStep):
                messages.append(step.model_response)
-            elif step.step_type == StepType.tool_execution.value:
+            elif step.step_type == StepType.tool_execution.value and isinstance(step, ToolExecutionStep):
                for response in step.tool_responses:
                    messages.append(
                        ToolResponseMessage(
@ -159,8 +160,8 @@ class ChatAgent(ShieldRunnerMixin):
                            content=response.content,
                        )
                    )
-            elif step.step_type == StepType.shield_call.value:
+            elif step.step_type == StepType.shield_call.value and isinstance(step, ShieldCallStep):
-                if step.violation:
+                if step.violation and step.violation.user_message:
                    # CompletionMessage itself in the ShieldResponse
                    messages.append(
                        CompletionMessage(
@ -174,7 +175,7 @@ class ChatAgent(ShieldRunnerMixin):
        return await self.storage.create_session(name)
    async def get_messages_from_turns(self, turns: list[Turn]) -> list[Message]:
-        messages = []
+        messages: list[Message] = []
        if self.agent_config.instructions != "":
            messages.append(SystemMessage(content=self.agent_config.instructions))
@ -231,7 +232,9 @@ class ChatAgent(ShieldRunnerMixin):
        steps = []
        messages = await self.get_messages_from_turns(turns)
        if is_resume:
            assert isinstance(request, AgentTurnResumeRequest)
            tool_response_messages = [
                ToolResponseMessage(call_id=x.call_id, content=x.content) for x in request.tool_responses
            ]
@ -252,42 +255,52 @@ class ChatAgent(ShieldRunnerMixin):
            in_progress_tool_call_step = await self.storage.get_in_progress_tool_call_step(
                request.session_id, request.turn_id
            )
-            now = datetime.now(UTC).isoformat()
+            now_dt = datetime.now(UTC)
            tool_execution_step = ToolExecutionStep(
                step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
                turn_id=request.turn_id,
                tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []),
                tool_responses=request.tool_responses,
-                completed_at=now,
+                completed_at=now_dt,
-                started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now),
+                started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now_dt),
            )
            steps.append(tool_execution_step)
            yield AgentTurnResponseStreamChunk(
                event=AgentTurnResponseEvent(
                    payload=AgentTurnResponseStepCompletePayload(
-                        step_type=StepType.tool_execution.value,
+                        step_type=StepType.tool_execution,
                        step_id=tool_execution_step.step_id,
                        step_details=tool_execution_step,
                    )
                )
            )
-            input_messages = last_turn.input_messages
+            # Cast needed due to list invariance - last_turn.input_messages is the right type
            input_messages = last_turn.input_messages  # type: ignore[assignment]
-            turn_id = request.turn_id
+            actual_turn_id = request.turn_id
            start_time = last_turn.started_at
        else:
            assert isinstance(request, AgentTurnCreateRequest)
            messages.extend(request.messages)
-            start_time = datetime.now(UTC).isoformat()
+            start_time = datetime.now(UTC)
-            input_messages = request.messages
+            # Cast needed due to list invariance - request.messages is the right type
            input_messages = request.messages  # type: ignore[assignment]
            # Use the generated turn_id from beginning of function
            actual_turn_id = turn_id if turn_id else str(uuid.uuid4())
        output_message = None
        req_documents = request.documents if isinstance(request, AgentTurnCreateRequest) and not is_resume else None
        req_sampling = (
            self.agent_config.sampling_params if self.agent_config.sampling_params is not None else SamplingParams()
        )
        async for chunk in self.run(
            session_id=request.session_id,
-            turn_id=turn_id,
+            turn_id=actual_turn_id,
            input_messages=messages,
-            sampling_params=self.agent_config.sampling_params,
+            sampling_params=req_sampling,
            stream=request.stream,
-            documents=request.documents if not is_resume else None,
+            documents=req_documents,
        ):
            if isinstance(chunk, CompletionMessage):
                output_message = chunk
@ -295,20 +308,23 @@ class ChatAgent(ShieldRunnerMixin):
            assert isinstance(chunk, AgentTurnResponseStreamChunk), f"Unexpected type {type(chunk)}"
            event = chunk.event
-            if event.payload.event_type == AgentTurnResponseEventType.step_complete.value:
+            if event.payload.event_type == AgentTurnResponseEventType.step_complete.value and hasattr(
-                steps.append(event.payload.step_details)
+                event.payload, "step_details"
            ):
                step_details = event.payload.step_details
                steps.append(step_details)
            yield chunk
        assert output_message is not None
        turn = Turn(
-            turn_id=turn_id,
+            turn_id=actual_turn_id,
            session_id=request.session_id,
-            input_messages=input_messages,
+            input_messages=input_messages,  # type: ignore[arg-type]
            output_message=output_message,
            started_at=start_time,
-            completed_at=datetime.now(UTC).isoformat(),
+            completed_at=datetime.now(UTC),
            steps=steps,
        )
        await self.storage.add_turn_to_session(request.session_id, turn)
@ -345,9 +361,9 @@ class ChatAgent(ShieldRunnerMixin):
        # return a "final value" for the `yield from` statement. we simulate that by yielding a
        # final boolean (to see whether an exception happened) and then explicitly testing for it.
-        if len(self.input_shields) > 0:
+        if self.input_shields:
            async for res in self.run_multiple_shields_wrapper(
-                turn_id, input_messages, self.input_shields, "user-input"
+                turn_id, cast(list[OpenAIMessageParam], input_messages), self.input_shields, "user-input"
            ):
                if isinstance(res, bool):
                    return
@ -374,9 +390,9 @@ class ChatAgent(ShieldRunnerMixin):
        # for output shields run on the full input and output combination
        messages = input_messages + [final_response]
-        if len(self.output_shields) > 0:
+        if self.output_shields:
            async for res in self.run_multiple_shields_wrapper(
-                turn_id, messages, self.output_shields, "assistant-output"
+                turn_id, cast(list[OpenAIMessageParam], messages), self.output_shields, "assistant-output"
            ):
                if isinstance(res, bool):
                    return
@ -388,7 +404,7 @@ class ChatAgent(ShieldRunnerMixin):
    async def run_multiple_shields_wrapper(
        self,
        turn_id: str,
-        messages: list[Message],
+        messages: list[OpenAIMessageParam],
        shields: list[str],
        touchpoint: str,
    ) -> AsyncGenerator:
@ -402,12 +418,12 @@ class ChatAgent(ShieldRunnerMixin):
                return
            step_id = str(uuid.uuid4())
-            shield_call_start_time = datetime.now(UTC).isoformat()
+            shield_call_start_time = datetime.now(UTC)
            try:
                yield AgentTurnResponseStreamChunk(
                    event=AgentTurnResponseEvent(
                        payload=AgentTurnResponseStepStartPayload(
-                            step_type=StepType.shield_call.value,
+                            step_type=StepType.shield_call,
                            step_id=step_id,
                            metadata=dict(touchpoint=touchpoint),
                        )
@ -419,14 +435,14 @@ class ChatAgent(ShieldRunnerMixin):
                yield AgentTurnResponseStreamChunk(
                    event=AgentTurnResponseEvent(
                        payload=AgentTurnResponseStepCompletePayload(
-                            step_type=StepType.shield_call.value,
+                            step_type=StepType.shield_call,
                            step_id=step_id,
                            step_details=ShieldCallStep(
                                step_id=step_id,
                                turn_id=turn_id,
                                violation=e.violation,
                                started_at=shield_call_start_time,
-                                completed_at=datetime.now(UTC).isoformat(),
+                                completed_at=datetime.now(UTC),
                            ),
                        )
                    )
@ -443,14 +459,14 @@ class ChatAgent(ShieldRunnerMixin):
            yield AgentTurnResponseStreamChunk(
                event=AgentTurnResponseEvent(
                    payload=AgentTurnResponseStepCompletePayload(
-                        step_type=StepType.shield_call.value,
+                        step_type=StepType.shield_call,
                        step_id=step_id,
                        step_details=ShieldCallStep(
                            step_id=step_id,
                            turn_id=turn_id,
                            violation=None,
                            started_at=shield_call_start_time,
-                            completed_at=datetime.now(UTC).isoformat(),
+                            completed_at=datetime.now(UTC),
                        ),
                    )
                )
@ -496,21 +512,22 @@ class ChatAgent(ShieldRunnerMixin):
                    else:
                        self.tool_name_to_args[tool_name]["vector_store_ids"].append(session_info.vector_store_id)
-        output_attachments = []
+        output_attachments: list[Attachment] = []
        n_iter = await self.storage.get_num_infer_iters_in_turn(session_id, turn_id) or 0
        # Build a map of custom tools to their definitions for faster lookup
        client_tools = {}
-        for tool in self.agent_config.client_tools:
+        if self.agent_config.client_tools:
-            client_tools[tool.name] = tool
+            for tool in self.agent_config.client_tools:
                client_tools[tool.name] = tool
        while True:
            step_id = str(uuid.uuid4())
-            inference_start_time = datetime.now(UTC).isoformat()
+            inference_start_time = datetime.now(UTC)
            yield AgentTurnResponseStreamChunk(
                event=AgentTurnResponseEvent(
                    payload=AgentTurnResponseStepStartPayload(
-                        step_type=StepType.inference.value,
+                        step_type=StepType.inference,
                        step_id=step_id,
                    )
                )
@ -538,7 +555,7 @@ class ChatAgent(ShieldRunnerMixin):
                    else:
                        return value
-                def _add_type(openai_msg: dict) -> OpenAIMessageParam:
+                def _add_type(openai_msg: Any) -> OpenAIMessageParam:
                    # Serialize any nested Pydantic models to plain dicts
                    openai_msg = _serialize_nested(openai_msg)
@ -588,7 +605,7 @@ class ChatAgent(ShieldRunnerMixin):
                    messages=openai_messages,
                    tools=openai_tools if openai_tools else None,
                    tool_choice=tool_choice,
-                    response_format=self.agent_config.response_format,
+                    response_format=self.agent_config.response_format,  # type: ignore[arg-type]
                    temperature=temperature,
                    top_p=top_p,
                    max_tokens=max_tokens,
@ -598,7 +615,8 @@ class ChatAgent(ShieldRunnerMixin):
                # Convert OpenAI stream back to Llama Stack format
                response_stream = convert_openai_chat_completion_stream(
-                    openai_stream, enable_incremental_tool_calls=True
+                    openai_stream,  # type: ignore[arg-type]
                    enable_incremental_tool_calls=True,
                )
                async for chunk in response_stream:
@ -620,7 +638,7 @@ class ChatAgent(ShieldRunnerMixin):
                            yield AgentTurnResponseStreamChunk(
                                event=AgentTurnResponseEvent(
                                    payload=AgentTurnResponseStepProgressPayload(
-                                        step_type=StepType.inference.value,
+                                        step_type=StepType.inference,
                                        step_id=step_id,
                                        delta=delta,
                                    )
@ -633,7 +651,7 @@ class ChatAgent(ShieldRunnerMixin):
                            yield AgentTurnResponseStreamChunk(
                                event=AgentTurnResponseEvent(
                                    payload=AgentTurnResponseStepProgressPayload(
-                                        step_type=StepType.inference.value,
+                                        step_type=StepType.inference,
                                        step_id=step_id,
                                        delta=delta,
                                    )
@ -651,7 +669,9 @@ class ChatAgent(ShieldRunnerMixin):
                    output_attr = json.dumps(
                        {
                            "content": content,
-                            "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
+                            "tool_calls": [
                                json.loads(t.model_dump_json()) for t in tool_calls if isinstance(t, ToolCall)
                            ],
                        }
                    )
                    span.set_attribute("output", output_attr)
@ -667,16 +687,18 @@ class ChatAgent(ShieldRunnerMixin):
            if tool_calls:
                content = ""
            # Filter out string tool calls for CompletionMessage (only keep ToolCall objects)
            valid_tool_calls = [t for t in tool_calls if isinstance(t, ToolCall)]
            message = CompletionMessage(
                content=content,
                stop_reason=stop_reason,
-                tool_calls=tool_calls,
+                tool_calls=valid_tool_calls if valid_tool_calls else None,
            )
            yield AgentTurnResponseStreamChunk(
                event=AgentTurnResponseEvent(
                    payload=AgentTurnResponseStepCompletePayload(
-                        step_type=StepType.inference.value,
+                        step_type=StepType.inference,
                        step_id=step_id,
                        step_details=InferenceStep(
                            # somewhere deep, we are re-assigning message or closing over some
@ -686,13 +708,14 @@ class ChatAgent(ShieldRunnerMixin):
                            turn_id=turn_id,
                            model_response=copy.deepcopy(message),
                            started_at=inference_start_time,
-                            completed_at=datetime.now(UTC).isoformat(),
+                            completed_at=datetime.now(UTC),
                        ),
                    )
                )
            )
-            if n_iter >= self.agent_config.max_infer_iters:
+            max_iters = self.agent_config.max_infer_iters if self.agent_config.max_infer_iters is not None else 10
            if n_iter >= max_iters:
                logger.info(f"done with MAX iterations ({n_iter}), exiting.")
                # NOTE: mark end_of_turn to indicate to client that we are done with the turn
                # Do not continue the tool call loop after this point
@ -705,14 +728,16 @@ class ChatAgent(ShieldRunnerMixin):
                yield message
                break
-            if len(message.tool_calls) == 0:
+            if not message.tool_calls or len(message.tool_calls) == 0:
                if stop_reason == StopReason.end_of_turn:
                    # TODO: UPDATE RETURN TYPE TO SEND A TUPLE OF (MESSAGE, ATTACHMENTS)
                    if len(output_attachments) > 0:
                        if isinstance(message.content, list):
-                            message.content += output_attachments
+                            # List invariance - attachments are compatible at runtime
                            message.content += output_attachments  # type: ignore[arg-type]
                        else:
-                            message.content = [message.content] + output_attachments
+                            # List invariance - attachments are compatible at runtime
                            message.content = [message.content] + output_attachments  # type: ignore[assignment]
                    yield message
                else:
                    logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}")
@ -725,11 +750,12 @@ class ChatAgent(ShieldRunnerMixin):
                non_client_tool_calls = []
                # Separate client and non-client tool calls
-                for tool_call in message.tool_calls:
+                if message.tool_calls:
-                    if tool_call.tool_name in client_tools:
+                    for tool_call in message.tool_calls:
-                        client_tool_calls.append(tool_call)
+                        if tool_call.tool_name in client_tools:
-                    else:
+                            client_tool_calls.append(tool_call)
-                        non_client_tool_calls.append(tool_call)
+                        else:
                            non_client_tool_calls.append(tool_call)
                # Process non-client tool calls first
                for tool_call in non_client_tool_calls:
@ -737,7 +763,7 @@ class ChatAgent(ShieldRunnerMixin):
                    yield AgentTurnResponseStreamChunk(
                        event=AgentTurnResponseEvent(
                            payload=AgentTurnResponseStepStartPayload(
-                                step_type=StepType.tool_execution.value,
+                                step_type=StepType.tool_execution,
                                step_id=step_id,
                            )
                        )
@ -746,7 +772,7 @@ class ChatAgent(ShieldRunnerMixin):
                    yield AgentTurnResponseStreamChunk(
                        event=AgentTurnResponseEvent(
                            payload=AgentTurnResponseStepProgressPayload(
-                                step_type=StepType.tool_execution.value,
+                                step_type=StepType.tool_execution,
                                step_id=step_id,
                                delta=ToolCallDelta(
                                    parse_status=ToolCallParseStatus.in_progress,
@ -766,7 +792,7 @@ class ChatAgent(ShieldRunnerMixin):
                        if self.telemetry_enabled
                        else {},
                    ) as span:
-                        tool_execution_start_time = datetime.now(UTC).isoformat()
+                        tool_execution_start_time = datetime.now(UTC)
                        tool_result = await self.execute_tool_call_maybe(
                            session_id,
                            tool_call,
@ -796,14 +822,14 @@ class ChatAgent(ShieldRunnerMixin):
                                )
                            ],
                            started_at=tool_execution_start_time,
-                            completed_at=datetime.now(UTC).isoformat(),
+                            completed_at=datetime.now(UTC),
                        )
                        # Yield the step completion event
                        yield AgentTurnResponseStreamChunk(
                            event=AgentTurnResponseEvent(
                                payload=AgentTurnResponseStepCompletePayload(
-                                    step_type=StepType.tool_execution.value,
+                                    step_type=StepType.tool_execution,
                                    step_id=step_id,
                                    step_details=tool_execution_step,
                                )
@ -833,7 +859,7 @@ class ChatAgent(ShieldRunnerMixin):
                            turn_id=turn_id,
                            tool_calls=client_tool_calls,
                            tool_responses=[],
-                            started_at=datetime.now(UTC).isoformat(),
+                            started_at=datetime.now(UTC),
                        ),
                    )
@ -868,19 +894,20 @@ class ChatAgent(ShieldRunnerMixin):
        toolgroup_to_args = toolgroup_to_args or {}
-        tool_name_to_def = {}
+        tool_name_to_def: dict[str, ToolDefinition] = {}
        tool_name_to_args = {}
-        for tool_def in self.agent_config.client_tools:
+        if self.agent_config.client_tools:
-            if tool_name_to_def.get(tool_def.name, None):
+            for tool_def in self.agent_config.client_tools:
-                raise ValueError(f"Tool {tool_def.name} already exists")
+                if tool_name_to_def.get(tool_def.name, None):
                    raise ValueError(f"Tool {tool_def.name} already exists")
-            # Use input_schema from ToolDef directly
+                # Use input_schema from ToolDef directly
-            tool_name_to_def[tool_def.name] = ToolDefinition(
+                tool_name_to_def[tool_def.name] = ToolDefinition(
-                tool_name=tool_def.name,
+                    tool_name=tool_def.name,
-                description=tool_def.description,
+                    description=tool_def.description,
-                input_schema=tool_def.input_schema,
+                    input_schema=tool_def.input_schema,
-            )
+                )
        for toolgroup_name_with_maybe_tool_name in agent_config_toolgroups:
            toolgroup_name, input_tool_name = self._parse_toolgroup_name(toolgroup_name_with_maybe_tool_name)
            tools = await self.tool_groups_api.list_tools(toolgroup_id=toolgroup_name)
@ -908,15 +935,17 @@ class ChatAgent(ShieldRunnerMixin):
                    else:
                        identifier = None
                if tool_name_to_def.get(identifier, None):
                    raise ValueError(f"Tool {identifier} already exists")
                if identifier:
-                    tool_name_to_def[identifier] = ToolDefinition(
+                    # Convert BuiltinTool to string for dictionary key
-                        tool_name=identifier,
+                    identifier_str = identifier.value if isinstance(identifier, BuiltinTool) else identifier
                    if tool_name_to_def.get(identifier_str, None):
                        raise ValueError(f"Tool {identifier_str} already exists")
                    tool_name_to_def[identifier_str] = ToolDefinition(
                        tool_name=identifier_str,
                        description=tool_def.description,
                        input_schema=tool_def.input_schema,
                    )
-                    tool_name_to_args[identifier] = toolgroup_to_args.get(toolgroup_name, {})
+                    tool_name_to_args[identifier_str] = toolgroup_to_args.get(toolgroup_name, {})
        self.tool_defs, self.tool_name_to_args = (
            list(tool_name_to_def.values()),
@ -966,14 +995,17 @@ class ChatAgent(ShieldRunnerMixin):
        except json.JSONDecodeError as e:
            raise ValueError(f"Failed to parse arguments for tool call: {tool_call.arguments}") from e
-        result = await self.tool_runtime_api.invoke_tool(
+        result = cast(
-            tool_name=tool_name_str,
+            ToolInvocationResult,
-            kwargs={
+            await self.tool_runtime_api.invoke_tool(
-                "session_id": session_id,
+                tool_name=tool_name_str,
-                # get the arguments generated by the model and augment with toolgroup arg overrides for the agent
+                kwargs={
-                **args,
+                    "session_id": session_id,
-                **self.tool_name_to_args.get(tool_name_str, {}),
+                    # get the arguments generated by the model and augment with toolgroup arg overrides for the agent
-            },
+                    **args,
                    **self.tool_name_to_args.get(tool_name_str, {}),
                },
            ),
        )
        logger.debug(f"tool call {tool_name_str} completed with result: {result}")
        return result
@ -1017,7 +1049,7 @@ def _interpret_content_as_attachment(
        snippet = match.group(1)
        data = json.loads(snippet)
        return Attachment(
-            url=URL(uri="file://" + data["filepath"]),
+            content=URL(uri="file://" + data["filepath"]),
            mime_type=data["mimetype"],
        )
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -21,6 +21,7 @@ from llama_stack.apis.agents import (
    Document,
    ListOpenAIResponseInputItem,
    ListOpenAIResponseObject,
    OpenAIDeleteResponseObject,
    OpenAIResponseInput,
    OpenAIResponseInputTool,
    OpenAIResponseObject,
@ -141,7 +142,7 @@ class MetaReferenceAgentsImpl(Agents):
            persistence_store=(
                self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
            ),
-            created_at=agent_info.created_at,
+            created_at=agent_info.created_at.isoformat(),
            policy=self.policy,
            telemetry_enabled=self.telemetry_enabled,
        )
@ -163,9 +164,9 @@ class MetaReferenceAgentsImpl(Agents):
        agent_id: str,
        session_id: str,
        messages: list[UserMessage | ToolResponseMessage],
        toolgroups: list[AgentToolGroup] | None = None,
        documents: list[Document] | None = None,
        stream: bool | None = False,
        documents: list[Document] | None = None,
        toolgroups: list[AgentToolGroup] | None = None,
        tool_config: ToolConfig | None = None,
    ) -> AsyncGenerator:
        request = AgentTurnCreateRequest(
@ -221,6 +222,8 @@ class MetaReferenceAgentsImpl(Agents):
    async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
        agent = await self._get_agent_impl(agent_id)
        turn = await agent.storage.get_session_turn(session_id, turn_id)
        if turn is None:
            raise ValueError(f"Turn {turn_id} not found in session {session_id}")
        return turn
    async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse:
@ -232,13 +235,15 @@ class MetaReferenceAgentsImpl(Agents):
    async def get_agents_session(
        self,
        agent_id: str,
        session_id: str,
        agent_id: str,
        turn_ids: list[str] | None = None,
    ) -> Session:
        agent = await self._get_agent_impl(agent_id)
        session_info = await agent.storage.get_session_info(session_id)
        if session_info is None:
            raise ValueError(f"Session {session_id} not found")
        turns = await agent.storage.get_session_turns(session_id)
        if turn_ids:
            turns = [turn for turn in turns if turn.turn_id in turn_ids]
@ -249,7 +254,7 @@ class MetaReferenceAgentsImpl(Agents):
            started_at=session_info.started_at,
        )
-    async def delete_agents_session(self, agent_id: str, session_id: str) -> None:
+    async def delete_agents_session(self, session_id: str, agent_id: str) -> None:
        agent = await self._get_agent_impl(agent_id)
        # Delete turns first, then the session
@ -302,7 +307,7 @@ class MetaReferenceAgentsImpl(Agents):
        agent = Agent(
            agent_id=agent_id,
            agent_config=chat_agent.agent_config,
-            created_at=chat_agent.created_at,
+            created_at=datetime.fromisoformat(chat_agent.created_at),
        )
        return agent
@ -323,6 +328,7 @@ class MetaReferenceAgentsImpl(Agents):
        self,
        response_id: str,
    ) -> OpenAIResponseObject:
        assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
        return await self.openai_responses_impl.get_openai_response(response_id)
    async def create_openai_response(
@ -342,7 +348,8 @@ class MetaReferenceAgentsImpl(Agents):
        max_infer_iters: int | None = 10,
        guardrails: list[ResponseGuardrail] | None = None,
    ) -> OpenAIResponseObject:
-        return await self.openai_responses_impl.create_openai_response(
+        assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
        result = await self.openai_responses_impl.create_openai_response(
            input,
            model,
            prompt,
@ -358,6 +365,7 @@ class MetaReferenceAgentsImpl(Agents):
            max_infer_iters,
            guardrails,
        )
        return result  # type: ignore[no-any-return]
    async def list_openai_responses(
        self,
@ -366,6 +374,7 @@ class MetaReferenceAgentsImpl(Agents):
        model: str | None = None,
        order: Order | None = Order.desc,
    ) -> ListOpenAIResponseObject:
        assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
        return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
    async def list_openai_response_input_items(
@ -377,9 +386,11 @@ class MetaReferenceAgentsImpl(Agents):
        limit: int | None = 20,
        order: Order | None = Order.desc,
    ) -> ListOpenAIResponseInputItem:
        assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
        return await self.openai_responses_impl.list_openai_response_input_items(
            response_id, after, before, include, limit, order
        )
-    async def delete_openai_response(self, response_id: str) -> None:
+    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
        return await self.openai_responses_impl.delete_openai_response(response_id)
--- a/src/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/persistence.py
@ -6,12 +6,14 @@
 import json
 import uuid
 from dataclasses import dataclass
 from datetime import UTC, datetime
 from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn
 from llama_stack.apis.common.errors import SessionNotFoundError
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
-from llama_stack.core.access_control.datatypes import AccessRule
+from llama_stack.core.access_control.conditions import User as ProtocolUser
 from llama_stack.core.access_control.datatypes import AccessRule, Action
 from llama_stack.core.datatypes import User
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.log import get_logger
@ -33,6 +35,15 @@ class AgentInfo(AgentConfig):
    created_at: datetime
@dataclass
 class SessionResource:
    """Concrete implementation of ProtectedResource for session access control."""
    type: str
    identifier: str
    owner: ProtocolUser  # Use the protocol type for structural compatibility
 class AgentPersistence:
    def __init__(self, agent_id: str, kvstore: KVStore, policy: list[AccessRule]):
        self.agent_id = agent_id
@ -53,8 +64,15 @@ class AgentPersistence:
            turns=[],
            identifier=name,  # should this be qualified in any way?
        )
-        if not is_action_allowed(self.policy, "create", session_info, user):
+        # Only perform access control if we have an authenticated user
-            raise AccessDeniedError("create", session_info, user)
+        if user is not None and session_info.identifier is not None:
            resource = SessionResource(
                type=session_info.type,
                identifier=session_info.identifier,
                owner=user,
            )
            if not is_action_allowed(self.policy, Action.CREATE, resource, user):
                raise AccessDeniedError(Action.CREATE, resource, user)
        await self.kvstore.set(
            key=f"session:{self.agent_id}:{session_id}",
@ -62,7 +80,7 @@ class AgentPersistence:
        )
        return session_id
-    async def get_session_info(self, session_id: str) -> AgentSessionInfo:
+    async def get_session_info(self, session_id: str) -> AgentSessionInfo | None:
        value = await self.kvstore.get(
            key=f"session:{self.agent_id}:{session_id}",
        )
@ -83,7 +101,22 @@ class AgentPersistence:
        if not hasattr(session_info, "access_attributes") and not hasattr(session_info, "owner"):
            return True
-        return is_action_allowed(self.policy, "read", session_info, get_authenticated_user())
+        # Get current user - if None, skip access control (e.g., in tests)
        user = get_authenticated_user()
        if user is None:
            return True
        # Access control requires identifier and owner to be set
        if session_info.identifier is None or session_info.owner is None:
            return True
        # At this point, both identifier and owner are guaranteed to be non-None
        resource = SessionResource(
            type=session_info.type,
            identifier=session_info.identifier,
            owner=session_info.owner,
        )
        return is_action_allowed(self.policy, Action.READ, resource, user)
    async def get_session_if_accessible(self, session_id: str) -> AgentSessionInfo | None:
        """Get session info if the user has access to it. For internal use by sub-session methods."""
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -91,7 +91,8 @@ class OpenAIResponsesImpl:
        input: str | list[OpenAIResponseInput],
        previous_response: _OpenAIResponseObjectWithInputAndMessages,
    ):
-        new_input_items = previous_response.input.copy()
+        # Convert Sequence to list for mutation
        new_input_items = list(previous_response.input)
        new_input_items.extend(previous_response.output)
        if isinstance(input, str):
@ -107,7 +108,7 @@ class OpenAIResponsesImpl:
        tools: list[OpenAIResponseInputTool] | None,
        previous_response_id: str | None,
        conversation: str | None,
-    ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]:
+    ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]:
        """Process input with optional previous response context.
        Returns:
@ -208,6 +209,9 @@ class OpenAIResponsesImpl:
        messages: list[OpenAIMessageParam],
    ) -> None:
        new_input_id = f"msg_{uuid.uuid4()}"
        # Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues
        input_items_data: list[OpenAIResponseInput] = []
        if isinstance(input, str):
            # synthesize a message from the input string
            input_content = OpenAIResponseInputMessageContentText(text=input)
@ -219,7 +223,6 @@ class OpenAIResponsesImpl:
            input_items_data = [input_content_item]
        else:
            # we already have a list of messages
            input_items_data = []
            for input_item in input:
                if isinstance(input_item, OpenAIResponseMessage):
                    # These may or may not already have an id, so dump to dict, check for id, and add if missing
@ -251,7 +254,7 @@ class OpenAIResponsesImpl:
        tools: list[OpenAIResponseInputTool] | None = None,
        include: list[str] | None = None,
        max_infer_iters: int | None = 10,
-        guardrails: list[ResponseGuardrailSpec] | None = None,
+        guardrails: list[str | ResponseGuardrailSpec] | None = None,
    ):
        stream = bool(stream)
        text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@ -289,16 +292,19 @@ class OpenAIResponsesImpl:
            failed_response = None
            async for stream_chunk in stream_gen:
-                if stream_chunk.type in {"response.completed", "response.incomplete"}:
+                match stream_chunk.type:
-                    if final_response is not None:
+                    case "response.completed" | "response.incomplete":
-                        raise ValueError(
+                        if final_response is not None:
-                            "The response stream produced multiple terminal responses! "
+                            raise ValueError(
-                            f"Earlier response from {final_event_type}"
+                                "The response stream produced multiple terminal responses! "
-                        )
+                                f"Earlier response from {final_event_type}"
-                    final_response = stream_chunk.response
+                            )
-                    final_event_type = stream_chunk.type
+                        final_response = stream_chunk.response
-                elif stream_chunk.type == "response.failed":
+                        final_event_type = stream_chunk.type
-                    failed_response = stream_chunk.response
+                    case "response.failed":
                        failed_response = stream_chunk.response
                    case _:
                        pass  # Other event types don't have .response
            if failed_response is not None:
                error_message = (
@ -326,6 +332,11 @@ class OpenAIResponsesImpl:
        max_infer_iters: int | None = 10,
        guardrail_ids: list[str] | None = None,
    ) -> AsyncIterator[OpenAIResponseObjectStream]:
        # These should never be None when called from create_openai_response (which sets defaults)
        # but we assert here to help mypy understand the types
        assert text is not None, "text must not be None"
        assert max_infer_iters is not None, "max_infer_iters must not be None"
        # Input preprocessing
        all_input, messages, tool_context = await self._process_input_with_previous_response(
            input, tools, previous_response_id, conversation
@ -368,16 +379,19 @@ class OpenAIResponsesImpl:
        final_response = None
        failed_response = None
-        output_items = []
+        # Type as ConversationItem to avoid list invariance issues
        output_items: list[ConversationItem] = []
        async for stream_chunk in orchestrator.create_response():
-            if stream_chunk.type in {"response.completed", "response.incomplete"}:
+            match stream_chunk.type:
-                final_response = stream_chunk.response
+                case "response.completed" | "response.incomplete":
-            elif stream_chunk.type == "response.failed":
+                    final_response = stream_chunk.response
-                failed_response = stream_chunk.response
+                case "response.failed":
-
+                    failed_response = stream_chunk.response
-            if stream_chunk.type == "response.output_item.done":
+                case "response.output_item.done":
-                item = stream_chunk.item
+                    item = stream_chunk.item
-                output_items.append(item)
+                    output_items.append(item)
                case _:
                    pass  # Other event types
            # Store and sync before yielding terminal events
            # This ensures the storage/syncing happens even if the consumer breaks after receiving the event
@ -410,7 +424,8 @@ class OpenAIResponsesImpl:
        self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem]
    ) -> None:
        """Sync content and response messages to the conversation."""
-        conversation_items = []
+        # Type as ConversationItem union to avoid list invariance issues
        conversation_items: list[ConversationItem] = []
        if isinstance(input, str):
            conversation_items.append(
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -111,7 +111,7 @@ class StreamingResponseOrchestrator:
        text: OpenAIResponseText,
        max_infer_iters: int,
        tool_executor,  # Will be the tool execution logic from the main class
-        instructions: str,
+        instructions: str | None,
        safety_api,
        guardrail_ids: list[str] | None = None,
        prompt: OpenAIResponsePrompt | None = None,
@ -128,7 +128,9 @@ class StreamingResponseOrchestrator:
        self.prompt = prompt
        self.sequence_number = 0
        # Store MCP tool mapping that gets built during tool processing
-        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = ctx.tool_context.previous_tools or {}
+        self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
            ctx.tool_context.previous_tools if ctx.tool_context else {}
        )
        # Track final messages after all tool executions
        self.final_messages: list[OpenAIMessageParam] = []
        # mapping for annotations
@ -229,7 +231,8 @@ class StreamingResponseOrchestrator:
                params = OpenAIChatCompletionRequestWithExtraBody(
                    model=self.ctx.model,
                    messages=messages,
-                    tools=self.ctx.chat_tools,
+                    # Pydantic models are dict-compatible but mypy treats them as distinct types
                    tools=self.ctx.chat_tools,  # type: ignore[arg-type]
                    stream=True,
                    temperature=self.ctx.temperature,
                    response_format=response_format,
@ -272,7 +275,12 @@ class StreamingResponseOrchestrator:
                # Handle choices with no tool calls
                for choice in current_response.choices:
-                    if not (choice.message.tool_calls and self.ctx.response_tools):
+                    has_tool_calls = (
                        isinstance(choice.message, OpenAIAssistantMessageParam)
                        and choice.message.tool_calls
                        and self.ctx.response_tools
                    )
                    if not has_tool_calls:
                        output_messages.append(
                            await convert_chat_choice_to_response_message(
                                choice,
@ -722,7 +730,10 @@ class StreamingResponseOrchestrator:
                                )
                            # Accumulate arguments for final response (only for subsequent chunks)
-                            if not is_new_tool_call:
+                            if not is_new_tool_call and response_tool_call is not None:
                                # Both should have functions since we're inside the tool_call.function check above
                                assert response_tool_call.function is not None
                                assert tool_call.function is not None
                                response_tool_call.function.arguments = (
                                    response_tool_call.function.arguments or ""
                                ) + tool_call.function.arguments
@ -747,10 +758,13 @@ class StreamingResponseOrchestrator:
        for tool_call_index in sorted(chat_response_tool_calls.keys()):
            tool_call = chat_response_tool_calls[tool_call_index]
            # Ensure that arguments, if sent back to the inference provider, are not None
-            tool_call.function.arguments = tool_call.function.arguments or "{}"
+            if tool_call.function:
                tool_call.function.arguments = tool_call.function.arguments or "{}"
            tool_call_item_id = tool_call_item_ids[tool_call_index]
-            final_arguments = tool_call.function.arguments
+            final_arguments: str = tool_call.function.arguments or "{}" if tool_call.function else "{}"
-            tool_call_name = chat_response_tool_calls[tool_call_index].function.name
+            func = chat_response_tool_calls[tool_call_index].function
            tool_call_name = func.name if func else ""
            # Check if this is an MCP tool call
            is_mcp_tool = tool_call_name and tool_call_name in self.mcp_tool_to_server
@ -894,12 +908,11 @@ class StreamingResponseOrchestrator:
            self.sequence_number += 1
            if tool_call.function.name and tool_call.function.name in self.mcp_tool_to_server:
-                item = OpenAIResponseOutputMessageMCPCall(
+                item: OpenAIResponseOutput = OpenAIResponseOutputMessageMCPCall(
                    arguments="",
                    name=tool_call.function.name,
                    id=matching_item_id,
                    server_label=self.mcp_tool_to_server[tool_call.function.name].server_label,
                    status="in_progress",
                )
            elif tool_call.function.name == "web_search":
                item = OpenAIResponseOutputMessageWebSearchToolCall(
@ -1008,7 +1021,7 @@ class StreamingResponseOrchestrator:
                description=tool.description,
                input_schema=tool.input_schema,
            )
-            return convert_tooldef_to_openai_tool(tool_def)
+            return convert_tooldef_to_openai_tool(tool_def)  # type: ignore[return-value]  # Returns dict but ChatCompletionToolParam expects TypedDict
        # Initialize chat_tools if not already set
        if self.ctx.chat_tools is None:
@ -1016,7 +1029,7 @@ class StreamingResponseOrchestrator:
        for input_tool in tools:
            if input_tool.type == "function":
-                self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
+                self.ctx.chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))  # type: ignore[typeddict-item,arg-type]  # Dict compatible with FunctionDefinition
            elif input_tool.type in WebSearchToolTypes:
                tool_name = "web_search"
                # Need to access tool_groups_api from tool_executor
@ -1055,8 +1068,8 @@ class StreamingResponseOrchestrator:
                if isinstance(mcp_tool.allowed_tools, list):
                    always_allowed = mcp_tool.allowed_tools
                elif isinstance(mcp_tool.allowed_tools, AllowedToolsFilter):
-                    always_allowed = mcp_tool.allowed_tools.always
+                    # AllowedToolsFilter only has tool_names field (not allowed/disallowed)
-                    never_allowed = mcp_tool.allowed_tools.never
+                    always_allowed = mcp_tool.allowed_tools.tool_names
            # Call list_mcp_tools
            tool_defs = None
@ -1088,7 +1101,7 @@ class StreamingResponseOrchestrator:
                    openai_tool = convert_tooldef_to_chat_tool(t)
                    if self.ctx.chat_tools is None:
                        self.ctx.chat_tools = []
-                    self.ctx.chat_tools.append(openai_tool)
+                    self.ctx.chat_tools.append(openai_tool)  # type: ignore[arg-type]  # Returns dict but ChatCompletionToolParam expects TypedDict
                    # Add to MCP tool mapping
                    if t.name in self.mcp_tool_to_server:
@ -1120,13 +1133,17 @@ class StreamingResponseOrchestrator:
        self, output_messages: list[OpenAIResponseOutput]
    ) -> AsyncIterator[OpenAIResponseObjectStream]:
        # Handle all mcp tool lists from previous response that are still valid:
-        for tool in self.ctx.tool_context.previous_tool_listings:
+        # tool_context can be None when no tools are provided in the response request
-            async for evt in self._reuse_mcp_list_tools(tool, output_messages):
+        if self.ctx.tool_context:
-                yield evt
+            for tool in self.ctx.tool_context.previous_tool_listings:
-        # Process all remaining tools (including MCP tools) and emit streaming events
+                async for evt in self._reuse_mcp_list_tools(tool, output_messages):
-        if self.ctx.tool_context.tools_to_process:
+                    yield evt
-            async for stream_event in self._process_new_tools(self.ctx.tool_context.tools_to_process, output_messages):
+            # Process all remaining tools (including MCP tools) and emit streaming events
-                yield stream_event
+            if self.ctx.tool_context.tools_to_process:
                async for stream_event in self._process_new_tools(
                    self.ctx.tool_context.tools_to_process, output_messages
                ):
                    yield stream_event
    def _approval_required(self, tool_name: str) -> bool:
        if tool_name not in self.mcp_tool_to_server:
@ -1220,7 +1237,7 @@ class StreamingResponseOrchestrator:
            openai_tool = convert_tooldef_to_openai_tool(tool_def)
            if self.ctx.chat_tools is None:
                self.ctx.chat_tools = []
-            self.ctx.chat_tools.append(openai_tool)
+            self.ctx.chat_tools.append(openai_tool)  # type: ignore[arg-type]  # Returns dict but ChatCompletionToolParam expects TypedDict
        mcp_list_message = OpenAIResponseOutputMessageMCPListTools(
            id=f"mcp_list_{uuid.uuid4()}",
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@ -7,6 +7,7 @@
 import asyncio
 import json
 from collections.abc import AsyncIterator
 from typing import Any
 from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseInputToolFileSearch,
@ -22,6 +23,7 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseObjectStreamResponseWebSearchCallSearching,
    OpenAIResponseOutputMessageFileSearchToolCall,
    OpenAIResponseOutputMessageFileSearchToolCallResults,
    OpenAIResponseOutputMessageMCPCall,
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
 from llama_stack.apis.common.content_types import (
@ -67,7 +69,7 @@ class ToolExecutor:
    ) -> AsyncIterator[ToolExecutionResult]:
        tool_call_id = tool_call.id
        function = tool_call.function
-        tool_kwargs = json.loads(function.arguments) if function.arguments else {}
+        tool_kwargs = json.loads(function.arguments) if function and function.arguments else {}
        if not function or not tool_call_id or not function.name:
            yield ToolExecutionResult(sequence_number=sequence_number)
@ -84,7 +86,16 @@ class ToolExecutor:
        error_exc, result = await self._execute_tool(function.name, tool_kwargs, ctx, mcp_tool_to_server)
        # Emit completion events for tool execution
-        has_error = error_exc or (result and ((result.error_code and result.error_code > 0) or result.error_message))
+        has_error = bool(
            error_exc
            or (
                result
                and (
                    ((error_code := getattr(result, "error_code", None)) and error_code > 0)
                    or getattr(result, "error_message", None)
                )
            )
        )
        async for event_result in self._emit_completion_events(
            function.name, ctx, sequence_number, output_index, item_id, has_error, mcp_tool_to_server
        ):
@ -101,7 +112,9 @@ class ToolExecutor:
            sequence_number=sequence_number,
            final_output_message=output_message,
            final_input_message=input_message,
-            citation_files=result.metadata.get("citation_files") if result and result.metadata else None,
+            citation_files=(
                metadata.get("citation_files") if result and (metadata := getattr(result, "metadata", None)) else None
            ),
        )
    async def _execute_knowledge_search_via_vector_store(
@ -188,8 +201,9 @@ class ToolExecutor:
            citation_files[file_id] = filename
        # Cast to proper InterleavedContent type (list invariance)
        return ToolInvocationResult(
-            content=content_items,
+            content=content_items,  # type: ignore[arg-type]
            metadata={
                "document_ids": [r.file_id for r in search_results],
                "chunks": [r.content[0].text if r.content else "" for r in search_results],
@ -209,51 +223,60 @@ class ToolExecutor:
    ) -> AsyncIterator[ToolExecutionResult]:
        """Emit progress events for tool execution start."""
        # Emit in_progress event based on tool type (only for tools with specific streaming events)
        progress_event = None
        if mcp_tool_to_server and function_name in mcp_tool_to_server:
            sequence_number += 1
-            progress_event = OpenAIResponseObjectStreamResponseMcpCallInProgress(
+            yield ToolExecutionResult(
-                item_id=item_id,
+                stream_event=OpenAIResponseObjectStreamResponseMcpCallInProgress(
-                output_index=output_index,
+                    item_id=item_id,
                    output_index=output_index,
                    sequence_number=sequence_number,
                ),
                sequence_number=sequence_number,
            )
        elif function_name == "web_search":
            sequence_number += 1
-            progress_event = OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
+            yield ToolExecutionResult(
-                item_id=item_id,
+                stream_event=OpenAIResponseObjectStreamResponseWebSearchCallInProgress(
-                output_index=output_index,
+                    item_id=item_id,
                    output_index=output_index,
                    sequence_number=sequence_number,
                ),
                sequence_number=sequence_number,
            )
        elif function_name == "knowledge_search":
            sequence_number += 1
-            progress_event = OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
+            yield ToolExecutionResult(
-                item_id=item_id,
+                stream_event=OpenAIResponseObjectStreamResponseFileSearchCallInProgress(
-                output_index=output_index,
+                    item_id=item_id,
                    output_index=output_index,
                    sequence_number=sequence_number,
                ),
                sequence_number=sequence_number,
            )
        if progress_event:
            yield ToolExecutionResult(stream_event=progress_event, sequence_number=sequence_number)
        # For web search, emit searching event
        if function_name == "web_search":
            sequence_number += 1
-            searching_event = OpenAIResponseObjectStreamResponseWebSearchCallSearching(
+            yield ToolExecutionResult(
-                item_id=item_id,
+                stream_event=OpenAIResponseObjectStreamResponseWebSearchCallSearching(
-                output_index=output_index,
+                    item_id=item_id,
                    output_index=output_index,
                    sequence_number=sequence_number,
                ),
                sequence_number=sequence_number,
            )
            yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
        # For file search, emit searching event
        if function_name == "knowledge_search":
            sequence_number += 1
-            searching_event = OpenAIResponseObjectStreamResponseFileSearchCallSearching(
+            yield ToolExecutionResult(
-                item_id=item_id,
+                stream_event=OpenAIResponseObjectStreamResponseFileSearchCallSearching(
-                output_index=output_index,
+                    item_id=item_id,
                    output_index=output_index,
                    sequence_number=sequence_number,
                ),
                sequence_number=sequence_number,
            )
            yield ToolExecutionResult(stream_event=searching_event, sequence_number=sequence_number)
    async def _execute_tool(
        self,
@ -261,7 +284,7 @@ class ToolExecutor:
        tool_kwargs: dict,
        ctx: ChatCompletionContext,
        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
-    ) -> tuple[Exception | None, any]:
+    ) -> tuple[Exception | None, Any]:
        """Execute the tool and return error exception and result."""
        error_exc = None
        result = None
@ -284,9 +307,13 @@ class ToolExecutor:
                        kwargs=tool_kwargs,
                    )
            elif function_name == "knowledge_search":
-                response_file_search_tool = next(
+                response_file_search_tool = (
-                    (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
+                    next(
-                    None,
+                        (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)),
                        None,
                    )
                    if ctx.response_tools
                    else None
                )
                if response_file_search_tool:
                    # Use vector_stores.search API instead of knowledge_search tool
@ -322,35 +349,34 @@ class ToolExecutor:
        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
    ) -> AsyncIterator[ToolExecutionResult]:
        """Emit completion or failure events for tool execution."""
        completion_event = None
        if mcp_tool_to_server and function_name in mcp_tool_to_server:
            sequence_number += 1
            if has_error:
-                completion_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
+                mcp_failed_event = OpenAIResponseObjectStreamResponseMcpCallFailed(
                    sequence_number=sequence_number,
                )
                yield ToolExecutionResult(stream_event=mcp_failed_event, sequence_number=sequence_number)
            else:
-                completion_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
+                mcp_completed_event = OpenAIResponseObjectStreamResponseMcpCallCompleted(
                    sequence_number=sequence_number,
                )
                yield ToolExecutionResult(stream_event=mcp_completed_event, sequence_number=sequence_number)
        elif function_name == "web_search":
            sequence_number += 1
-            completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
+            web_completion_event = OpenAIResponseObjectStreamResponseWebSearchCallCompleted(
                item_id=item_id,
                output_index=output_index,
                sequence_number=sequence_number,
            )
            yield ToolExecutionResult(stream_event=web_completion_event, sequence_number=sequence_number)
        elif function_name == "knowledge_search":
            sequence_number += 1
-            completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
+            file_completion_event = OpenAIResponseObjectStreamResponseFileSearchCallCompleted(
                item_id=item_id,
                output_index=output_index,
                sequence_number=sequence_number,
            )
-
+            yield ToolExecutionResult(stream_event=file_completion_event, sequence_number=sequence_number)
        if completion_event:
            yield ToolExecutionResult(stream_event=completion_event, sequence_number=sequence_number)
    async def _build_result_messages(
        self,
@ -360,21 +386,18 @@ class ToolExecutor:
        tool_kwargs: dict,
        ctx: ChatCompletionContext,
        error_exc: Exception | None,
-        result: any,
+        result: Any,
        has_error: bool,
        mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] | None = None,
-    ) -> tuple[any, any]:
+    ) -> tuple[Any, Any]:
        """Build output and input messages from tool execution results."""
        from llama_stack.providers.utils.inference.prompt_adapter import (
            interleaved_content_as_str,
        )
        # Build output message
        message: Any
        if mcp_tool_to_server and function.name in mcp_tool_to_server:
            from llama_stack.apis.agents.openai_responses import (
                OpenAIResponseOutputMessageMCPCall,
            )
            message = OpenAIResponseOutputMessageMCPCall(
                id=item_id,
                arguments=function.arguments,
@ -383,10 +406,14 @@ class ToolExecutor:
            )
            if error_exc:
                message.error = str(error_exc)
-            elif (result and result.error_code and result.error_code > 0) or (result and result.error_message):
+            elif (result and (error_code := getattr(result, "error_code", None)) and error_code > 0) or (
-                message.error = f"Error (code {result.error_code}): {result.error_message}"
+                result and getattr(result, "error_message", None)
-            elif result and result.content:
+            ):
-                message.output = interleaved_content_as_str(result.content)
+                ec = getattr(result, "error_code", "unknown")
                em = getattr(result, "error_message", "")
                message.error = f"Error (code {ec}): {em}"
            elif result and (content := getattr(result, "content", None)):
                message.output = interleaved_content_as_str(content)
        else:
            if function.name == "web_search":
                message = OpenAIResponseOutputMessageWebSearchToolCall(
@ -401,17 +428,17 @@ class ToolExecutor:
                    queries=[tool_kwargs.get("query", "")],
                    status="completed",
                )
-                if result and "document_ids" in result.metadata:
+                if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
                    message.results = []
-                    for i, doc_id in enumerate(result.metadata["document_ids"]):
+                    for i, doc_id in enumerate(metadata["document_ids"]):
-                        text = result.metadata["chunks"][i] if "chunks" in result.metadata else None
+                        text = metadata["chunks"][i] if "chunks" in metadata else None
-                        score = result.metadata["scores"][i] if "scores" in result.metadata else None
+                        score = metadata["scores"][i] if "scores" in metadata else None
                        message.results.append(
                            OpenAIResponseOutputMessageFileSearchToolCallResults(
                                file_id=doc_id,
                                filename=doc_id,
-                                text=text,
+                                text=text if text is not None else "",
-                                score=score,
+                                score=score if score is not None else 0.0,
                                attributes={},
                            )
                        )
@ -421,27 +448,32 @@ class ToolExecutor:
                raise ValueError(f"Unknown tool {function.name} called")
        # Build input message
-        input_message = None
+        input_message: OpenAIToolMessageParam | None = None
-        if result and result.content:
+        if result and (result_content := getattr(result, "content", None)):
-            if isinstance(result.content, str):
+            # all the mypy contortions here are still unsatisfactory with random Any typing
-                content = result.content
+            if isinstance(result_content, str):
-            elif isinstance(result.content, list):
+                msg_content: str | list[Any] = result_content
-                content = []
+            elif isinstance(result_content, list):
-                for item in result.content:
+                content_list: list[Any] = []
                for item in result_content:
                    part: Any
                    if isinstance(item, TextContentItem):
                        part = OpenAIChatCompletionContentPartTextParam(text=item.text)
                    elif isinstance(item, ImageContentItem):
                        if item.image.data:
-                            url = f"data:image;base64,{item.image.data}"
+                            url_value = f"data:image;base64,{item.image.data}"
                        else:
-                            url = item.image.url
+                            url_value = str(item.image.url) if item.image.url else ""
-                        part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url))
+                        part = OpenAIChatCompletionContentPartImageParam(image_url=OpenAIImageURL(url=url_value))
                    else:
                        raise ValueError(f"Unknown result content type: {type(item)}")
-                    content.append(part)
+                    content_list.append(part)
                msg_content = content_list
            else:
-                raise ValueError(f"Unknown result content type: {type(result.content)}")
+                raise ValueError(f"Unknown result content type: {type(result_content)}")
-            input_message = OpenAIToolMessageParam(content=content, tool_call_id=tool_call_id)
+            # OpenAIToolMessageParam accepts str | list[TextParam] but we may have images
            # This is runtime-safe as the API accepts it, but mypy complains
            input_message = OpenAIToolMessageParam(content=msg_content, tool_call_id=tool_call_id)  # type: ignore[arg-type]
        else:
            text = str(error_exc) if error_exc else "Tool execution failed"
            input_message = OpenAIToolMessageParam(content=text, tool_call_id=tool_call_id)
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.
 from dataclasses import dataclass
 from typing import cast
 from openai.types.chat import ChatCompletionToolParam
 from pydantic import BaseModel
@ -100,17 +101,19 @@ class ToolContext(BaseModel):
                if isinstance(tool, OpenAIResponseToolMCP):
                    previous_tools_by_label[tool.server_label] = tool
            # collect tool definitions which are the same in current and previous requests:
-            tools_to_process = []
+            tools_to_process: list[OpenAIResponseInputTool] = []
            matched: dict[str, OpenAIResponseInputToolMCP] = {}
-            for tool in self.current_tools:
+            # Mypy confuses OpenAIResponseInputTool (Input union) with OpenAIResponseTool (output union)
            # which differ only in MCP type (InputToolMCP vs ToolMCP). Code is correct.
            for tool in cast(list[OpenAIResponseInputTool], self.current_tools):  # type: ignore[assignment]
                if isinstance(tool, OpenAIResponseInputToolMCP) and tool.server_label in previous_tools_by_label:
                    previous_tool = previous_tools_by_label[tool.server_label]
                    if previous_tool.allowed_tools == tool.allowed_tools:
                        matched[tool.server_label] = tool
                    else:
-                        tools_to_process.append(tool)
+                        tools_to_process.append(tool)  # type: ignore[arg-type]
                else:
-                    tools_to_process.append(tool)
+                    tools_to_process.append(tool)  # type: ignore[arg-type]
            # tools that are not the same or were not previously defined need to be processed:
            self.tools_to_process = tools_to_process
            # for all matched definitions, get the mcp_list_tools objects from the previous output:
@ -119,9 +122,11 @@ class ToolContext(BaseModel):
            ]
            # reconstruct the tool to server mappings that can be reused:
            for listing in self.previous_tool_listings:
                # listing is OpenAIResponseOutputMessageMCPListTools which has tools: list[MCPListToolsTool]
                definition = matched[listing.server_label]
-                for tool in listing.tools:
+                for mcp_tool in listing.tools:
-                    self.previous_tools[tool.name] = definition
+                    # mcp_tool is MCPListToolsTool which has a name: str field
                    self.previous_tools[mcp_tool.name] = definition
    def available_tools(self) -> list[OpenAIResponseTool]:
        if not self.current_tools:
@ -139,6 +144,8 @@ class ToolContext(BaseModel):
                    server_label=tool.server_label,
                    allowed_tools=tool.allowed_tools,
                )
            # Exhaustive check - all tool types should be handled above
            raise AssertionError(f"Unexpected tool type: {type(tool)}")
        return [convert_tool(tool) for tool in self.current_tools]
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@ -7,6 +7,7 @@
 import asyncio
 import re
 import uuid
 from collections.abc import Sequence
 from llama_stack.apis.agents.agents import ResponseGuardrailSpec
 from llama_stack.apis.agents.openai_responses import (
@ -71,14 +72,14 @@ async def convert_chat_choice_to_response_message(
    return OpenAIResponseMessage(
        id=message_id or f"msg_{uuid.uuid4()}",
-        content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)],
+        content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=list(annotations))],
        status="completed",
        role="assistant",
    )
 async def convert_response_content_to_chat_content(
-    content: (str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]),
+    content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
 ) -> str | list[OpenAIChatCompletionContentPartParam]:
    """
    Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
@ -88,7 +89,8 @@ async def convert_response_content_to_chat_content(
    if isinstance(content, str):
        return content
-    converted_parts = []
+    # Type with union to avoid list invariance issues
    converted_parts: list[OpenAIChatCompletionContentPartParam] = []
    for content_part in content:
        if isinstance(content_part, OpenAIResponseInputMessageContentText):
            converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
@ -158,9 +160,11 @@ async def convert_response_input_to_chat_messages(
                    ),
                )
                messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
                # Output can be None, use empty string as fallback
                output_content = input_item.output if input_item.output is not None else ""
                messages.append(
                    OpenAIToolMessageParam(
-                        content=input_item.output,
+                        content=output_content,
                        tool_call_id=input_item.id,
                    )
                )
@ -172,7 +176,8 @@ async def convert_response_input_to_chat_messages(
            ):
                # these are handled by the responses impl itself and not pass through to chat completions
                pass
-            else:
+            elif isinstance(input_item, OpenAIResponseMessage):
                # Narrow type to OpenAIResponseMessage which has content and role attributes
                content = await convert_response_content_to_chat_content(input_item.content)
                message_type = await get_message_type_by_role(input_item.role)
                if message_type is None:
@ -191,7 +196,8 @@ async def convert_response_input_to_chat_messages(
                        last_user_content = getattr(last_user_msg, "content", None)
                        if last_user_content == content:
                            continue  # Skip duplicate user message
-                messages.append(message_type(content=content))
+                # Dynamic message type call - different message types have different content expectations
                messages.append(message_type(content=content))  # type: ignore[call-arg,arg-type]
        if len(tool_call_results):
            # Check if unpaired function_call_outputs reference function_calls from previous messages
            if previous_messages:
@ -237,8 +243,11 @@ async def convert_response_text_to_chat_response_format(
    if text.format["type"] == "json_object":
        return OpenAIResponseFormatJSONObject()
    if text.format["type"] == "json_schema":
        # Assert name exists for json_schema format
        assert text.format.get("name"), "json_schema format requires a name"
        schema_name: str = text.format["name"]  # type: ignore[assignment]
        return OpenAIResponseFormatJSONSchema(
-            json_schema=OpenAIJSONSchema(name=text.format["name"], schema=text.format["schema"])
+            json_schema=OpenAIJSONSchema(name=schema_name, schema=text.format["schema"])
        )
    raise ValueError(f"Unsupported text format: {text.format}")
@ -251,7 +260,7 @@ async def get_message_type_by_role(role: str) -> type[OpenAIMessageParam] | None
        "assistant": OpenAIAssistantMessageParam,
        "developer": OpenAIDeveloperMessageParam,
    }
-    return role_to_type.get(role)
+    return role_to_type.get(role)  # type: ignore[return-value]  # Pydantic models use ModelMetaclass
 def _extract_citations_from_text(
@ -320,7 +329,8 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
    # Look up shields to get their provider_resource_id (actual model ID)
    model_ids = []
-    shields_list = await safety_api.routing_table.list_shields()
+    # TODO: list_shields not in Safety interface but available at runtime via API routing
    shields_list = await safety_api.routing_table.list_shields()  # type: ignore[attr-defined]
    for guardrail_id in guardrail_ids:
        matching_shields = [shield for shield in shields_list.data if shield.identifier == guardrail_id]
@ -337,7 +347,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
        for result in response.results:
            if result.flagged:
                message = result.user_message or "Content blocked by safety guardrails"
-                flagged_categories = [cat for cat, flagged in result.categories.items() if flagged]
+                flagged_categories = (
                    [cat for cat, flagged in result.categories.items() if flagged] if result.categories else []
                )
                violation_type = result.metadata.get("violation_type", []) if result.metadata else []
                if flagged_categories:
@ -347,6 +359,9 @@ async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[
                return message
    # No violations found
    return None
 def extract_guardrail_ids(guardrails: list | None) -> list[str]:
    """Extract guardrail IDs from guardrails parameter, handling both string IDs and ResponseGuardrailSpec objects."""
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@ -6,7 +6,7 @@
 import asyncio
-from llama_stack.apis.inference import Message
+from llama_stack.apis.inference import OpenAIMessageParam
 from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
 from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
@ -31,7 +31,7 @@ class ShieldRunnerMixin:
        self.input_shields = input_shields
        self.output_shields = output_shields
-    async def run_multiple_shields(self, messages: list[Message], identifiers: list[str]) -> None:
+    async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
        async def run_shield_with_span(identifier: str):
            async with tracing.span(f"run_shield_{identifier}"):
                return await self.safety_api.run_shield(
--- a/src/llama_stack/providers/remote/inference/anthropic/anthropic.py
+++ b/src/llama_stack/providers/remote/inference/anthropic/anthropic.py
@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin):
        return "https://api.anthropic.com/v1"
    async def list_provider_model_ids(self) -> Iterable[str]:
-        return [m.id async for m in AsyncAnthropic(api_key=self.get_api_key()).models.list()]
+        api_key = self._get_api_key_from_config_or_provider_data()
        return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()]
--- a/src/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py
@ -33,10 +33,11 @@ class DatabricksInferenceAdapter(OpenAIMixin):
    async def list_provider_model_ids(self) -> Iterable[str]:
        # Filter out None values from endpoint names
        api_token = self._get_api_key_from_config_or_provider_data()
        return [
            endpoint.name  # type: ignore[misc]
            for endpoint in WorkspaceClient(
-                host=self.config.url, token=self.get_api_key()
+                host=self.config.url, token=api_token
            ).serving_endpoints.list()  # TODO: this is not async
        ]
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -128,7 +128,9 @@ class LiteLLMOpenAIMixin(
        return schema
    async def _get_params(self, request: ChatCompletionRequest) -> dict:
-        input_dict = {}
+        from typing import Any
        input_dict: dict[str, Any] = {}
        input_dict["messages"] = [
            await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
@ -139,30 +141,27 @@ class LiteLLMOpenAIMixin(
                    f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
                )
-            fmt = fmt.json_schema
+            # Convert to dict for manipulation
-            name = fmt["title"]
+            fmt_dict = dict(fmt.json_schema)
-            del fmt["title"]
+            name = fmt_dict["title"]
-            fmt["additionalProperties"] = False
+            del fmt_dict["title"]
            fmt_dict["additionalProperties"] = False
            # Apply additionalProperties: False recursively to all objects
-            fmt = self._add_additional_properties_recursive(fmt)
+            fmt_dict = self._add_additional_properties_recursive(fmt_dict)
            input_dict["response_format"] = {
                "type": "json_schema",
                "json_schema": {
                    "name": name,
-                    "schema": fmt,
+                    "schema": fmt_dict,
                    "strict": self.json_schema_strict,
                },
            }
        if request.tools:
            input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
-            if request.tool_config.tool_choice:
+            if request.tool_config and (tool_choice := request.tool_config.tool_choice):
-                input_dict["tool_choice"] = (
+                input_dict["tool_choice"] = tool_choice.value if isinstance(tool_choice, ToolChoice) else tool_choice
                    request.tool_config.tool_choice.value
                    if isinstance(request.tool_config.tool_choice, ToolChoice)
                    else request.tool_config.tool_choice
                )
        return {
            "model": request.model,
@ -176,10 +175,10 @@ class LiteLLMOpenAIMixin(
    def get_api_key(self) -> str:
        provider_data = self.get_request_provider_data()
        key_field = self.provider_data_api_key_field
-        if provider_data and getattr(provider_data, key_field, None):
+        if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
-            api_key = getattr(provider_data, key_field)
+            return str(api_key)  # type: ignore[no-any-return]  # getattr returns Any, can't narrow without runtime type inspection
-        else:
+
-            api_key = self.api_key_from_config
+        api_key = self.api_key_from_config
        if not api_key:
            raise ValueError(
                "API key is not set. Please provide a valid API key in the "
@ -192,7 +191,13 @@ class LiteLLMOpenAIMixin(
        self,
        params: OpenAIEmbeddingsRequestWithExtraBody,
    ) -> OpenAIEmbeddingsResponse:
        if not self.model_store:
            raise ValueError("Model store is not initialized")
        model_obj = await self.model_store.get_model(params.model)
        if model_obj.provider_resource_id is None:
            raise ValueError(f"Model {params.model} has no provider_resource_id")
        provider_resource_id = model_obj.provider_resource_id
        # Convert input to list if it's a string
        input_list = [params.input] if isinstance(params.input, str) else params.input
@ -200,7 +205,7 @@ class LiteLLMOpenAIMixin(
        # Call litellm embedding function
        # litellm.drop_params = True
        response = litellm.embedding(
-            model=self.get_litellm_model_name(model_obj.provider_resource_id),
+            model=self.get_litellm_model_name(provider_resource_id),
            input=input_list,
            api_key=self.get_api_key(),
            api_base=self.api_base,
@ -217,7 +222,7 @@ class LiteLLMOpenAIMixin(
        return OpenAIEmbeddingsResponse(
            data=data,
-            model=model_obj.provider_resource_id,
+            model=provider_resource_id,
            usage=usage,
        )
@ -225,10 +230,16 @@ class LiteLLMOpenAIMixin(
        self,
        params: OpenAICompletionRequestWithExtraBody,
    ) -> OpenAICompletion:
        if not self.model_store:
            raise ValueError("Model store is not initialized")
        model_obj = await self.model_store.get_model(params.model)
        if model_obj.provider_resource_id is None:
            raise ValueError(f"Model {params.model} has no provider_resource_id")
        provider_resource_id = model_obj.provider_resource_id
        request_params = await prepare_openai_completion_params(
-            model=self.get_litellm_model_name(model_obj.provider_resource_id),
+            model=self.get_litellm_model_name(provider_resource_id),
            prompt=params.prompt,
            best_of=params.best_of,
            echo=params.echo,
@ -249,7 +260,8 @@ class LiteLLMOpenAIMixin(
            api_key=self.get_api_key(),
            api_base=self.api_base,
        )
-        return await litellm.atext_completion(**request_params)
+        # LiteLLM returns compatible type but mypy can't verify external library
        return await litellm.atext_completion(**request_params)  # type: ignore[no-any-return]  # external lib lacks type stubs
    async def openai_chat_completion(
        self,
@ -265,10 +277,16 @@ class LiteLLMOpenAIMixin(
            elif "include_usage" not in stream_options:
                stream_options = {**stream_options, "include_usage": True}
        if not self.model_store:
            raise ValueError("Model store is not initialized")
        model_obj = await self.model_store.get_model(params.model)
        if model_obj.provider_resource_id is None:
            raise ValueError(f"Model {params.model} has no provider_resource_id")
        provider_resource_id = model_obj.provider_resource_id
        request_params = await prepare_openai_completion_params(
-            model=self.get_litellm_model_name(model_obj.provider_resource_id),
+            model=self.get_litellm_model_name(provider_resource_id),
            messages=params.messages,
            frequency_penalty=params.frequency_penalty,
            function_call=params.function_call,
@ -294,7 +312,8 @@ class LiteLLMOpenAIMixin(
            api_key=self.get_api_key(),
            api_base=self.api_base,
        )
-        return await litellm.acompletion(**request_params)
+        # LiteLLM returns compatible type but mypy can't verify external library
        return await litellm.acompletion(**request_params)  # type: ignore[no-any-return]  # external lib lacks type stubs
    async def check_model_availability(self, model: str) -> bool:
        """
--- a/src/llama_stack/providers/utils/inference/openai_compat.py
+++ b/src/llama_stack/providers/utils/inference/openai_compat.py
@ -161,8 +161,10 @@ def get_sampling_strategy_options(params: SamplingParams) -> dict:
    if isinstance(params.strategy, GreedySamplingStrategy):
        options["temperature"] = 0.0
    elif isinstance(params.strategy, TopPSamplingStrategy):
-        options["temperature"] = params.strategy.temperature
+        if params.strategy.temperature is not None:
-        options["top_p"] = params.strategy.top_p
+            options["temperature"] = params.strategy.temperature
        if params.strategy.top_p is not None:
            options["top_p"] = params.strategy.top_p
    elif isinstance(params.strategy, TopKSamplingStrategy):
        options["top_k"] = params.strategy.top_k
    else:
@ -192,12 +194,12 @@ def get_sampling_options(params: SamplingParams | None) -> dict:
 def text_from_choice(choice) -> str:
    if hasattr(choice, "delta") and choice.delta:
-        return choice.delta.content
+        return choice.delta.content  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
    if hasattr(choice, "message"):
-        return choice.message.content
+        return choice.message.content  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
-    return choice.text
+    return choice.text  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
 def get_stop_reason(finish_reason: str) -> StopReason:
@ -216,7 +218,7 @@ def convert_openai_completion_logprobs(
 ) -> list[TokenLogProbs] | None:
    if not logprobs:
        return None
-    if hasattr(logprobs, "top_logprobs"):
+    if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
        return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
    # Together supports logprobs with top_k=1 only. This means for each token position,
@ -236,7 +238,7 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: float | OpenA
    if isinstance(logprobs, float):
        # Adapt response from Together CompletionChoicesChunk
        return [TokenLogProbs(logprobs_by_token={text: logprobs})]
-    if hasattr(logprobs, "top_logprobs"):
+    if hasattr(logprobs, "top_logprobs") and logprobs.top_logprobs:
        return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
    return None
@ -245,23 +247,24 @@ def process_completion_response(
    response: OpenAICompatCompletionResponse,
 ) -> CompletionResponse:
    choice = response.choices[0]
    text = choice.text or ""
    # drop suffix <eot_id> if present and return stop reason as end of turn
-    if choice.text.endswith("<|eot_id|>"):
+    if text.endswith("<|eot_id|>"):
        return CompletionResponse(
            stop_reason=StopReason.end_of_turn,
-            content=choice.text[: -len("<|eot_id|>")],
+            content=text[: -len("<|eot_id|>")],
            logprobs=convert_openai_completion_logprobs(choice.logprobs),
        )
    # drop suffix <eom_id> if present and return stop reason as end of message
-    if choice.text.endswith("<|eom_id|>"):
+    if text.endswith("<|eom_id|>"):
        return CompletionResponse(
            stop_reason=StopReason.end_of_message,
-            content=choice.text[: -len("<|eom_id|>")],
+            content=text[: -len("<|eom_id|>")],
            logprobs=convert_openai_completion_logprobs(choice.logprobs),
        )
    return CompletionResponse(
-        stop_reason=get_stop_reason(choice.finish_reason),
+        stop_reason=get_stop_reason(choice.finish_reason or "stop"),
-        content=choice.text,
+        content=text,
        logprobs=convert_openai_completion_logprobs(choice.logprobs),
    )
@ -272,10 +275,10 @@ def process_chat_completion_response(
 ) -> ChatCompletionResponse:
    choice = response.choices[0]
    if choice.finish_reason == "tool_calls":
-        if not choice.message or not choice.message.tool_calls:
+        if not hasattr(choice, "message") or not choice.message or not choice.message.tool_calls:  # type: ignore[attr-defined]  # OpenAICompatCompletionChoice is runtime duck-typed
            raise ValueError("Tool calls are not present in the response")
-        tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls]
+        tool_calls = [convert_tool_call(tool_call) for tool_call in choice.message.tool_calls]  # type: ignore[attr-defined]  # OpenAICompatCompletionChoice is runtime duck-typed
        if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
            # If we couldn't parse a tool call, jsonify the tool calls and return them
            return ChatCompletionResponse(
@ -287,9 +290,11 @@ def process_chat_completion_response(
            )
        else:
            # Otherwise, return tool calls as normal
            # Filter to only valid ToolCall objects
            valid_tool_calls = [tc for tc in tool_calls if isinstance(tc, ToolCall)]
            return ChatCompletionResponse(
                completion_message=CompletionMessage(
-                    tool_calls=tool_calls,
+                    tool_calls=valid_tool_calls,
                    stop_reason=StopReason.end_of_turn,
                    # Content is not optional
                    content="",
@ -299,7 +304,7 @@ def process_chat_completion_response(
    # TODO: This does not work well with tool calls for vLLM remote provider
    #   Ref: https://github.com/meta-llama/llama-stack/issues/1058
-    raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason))
+    raw_message = decode_assistant_message(text_from_choice(choice), get_stop_reason(choice.finish_reason or "stop"))
    # NOTE: If we do not set tools in chat-completion request, we should not
    # expect the ToolCall in the response. Instead, we should return the raw
@ -324,8 +329,8 @@ def process_chat_completion_response(
    return ChatCompletionResponse(
        completion_message=CompletionMessage(
-            content=raw_message.content,
+            content=raw_message.content,  # type: ignore[arg-type]  # decode_assistant_message returns Union[str, InterleavedContent]
-            stop_reason=raw_message.stop_reason,
+            stop_reason=raw_message.stop_reason or StopReason.end_of_turn,
            tool_calls=raw_message.tool_calls,
        ),
        logprobs=None,
@ -448,7 +453,7 @@ async def process_chat_completion_stream_response(
            )
    # parse tool calls and report errors
-    message = decode_assistant_message(buffer, stop_reason)
+    message = decode_assistant_message(buffer, stop_reason or StopReason.end_of_turn)
    parsed_tool_calls = len(message.tool_calls) > 0
    if ipython and not parsed_tool_calls:
@ -463,7 +468,7 @@ async def process_chat_completion_stream_response(
            )
        )
-    request_tools = {t.tool_name: t for t in request.tools}
+    request_tools = {t.tool_name: t for t in (request.tools or [])}
    for tool_call in message.tool_calls:
        if tool_call.tool_name in request_tools:
            yield ChatCompletionResponseStreamChunk(
@ -525,7 +530,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
    }
    if hasattr(message, "tool_calls") and message.tool_calls:
-        result["tool_calls"] = []
+        tool_calls_list = []
        for tc in message.tool_calls:
            # The tool.tool_name can be a str or a BuiltinTool enum. If
            # it's the latter, convert to a string.
@ -533,7 +538,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
            if isinstance(tool_name, BuiltinTool):
                tool_name = tool_name.value
-            result["tool_calls"].append(
+            tool_calls_list.append(
                {
                    "id": tc.call_id,
                    "type": "function",
@ -543,6 +548,7 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
                    },
                }
            )
        result["tool_calls"] = tool_calls_list  # type: ignore[assignment]  # dict allows Any value, stricter type expected
    return result
@ -608,7 +614,7 @@ async def convert_message_to_openai_dict_new(
                    ),
                )
            elif isinstance(content_, list):
-                return [await impl(item) for item in content_]
+                return [await impl(item) for item in content_]  # type: ignore[misc]  # recursive list comprehension confuses mypy's type narrowing
            else:
                raise ValueError(f"Unsupported content type: {type(content_)}")
@ -620,7 +626,7 @@ async def convert_message_to_openai_dict_new(
        else:
            return [ret]
-    out: OpenAIChatCompletionMessage = None
+    out: OpenAIChatCompletionMessage
    if isinstance(message, UserMessage):
        out = OpenAIChatCompletionUserMessage(
            role="user",
@ -636,7 +642,7 @@ async def convert_message_to_openai_dict_new(
                ),
                type="function",
            )
-            for tool in message.tool_calls
+            for tool in (message.tool_calls or [])
        ]
        params = {}
        if tool_calls:
@ -644,18 +650,18 @@ async def convert_message_to_openai_dict_new(
        out = OpenAIChatCompletionAssistantMessage(
            role="assistant",
            content=await _convert_message_content(message.content),
-            **params,
+            **params,  # type: ignore[typeddict-item]  # tool_calls dict expansion conflicts with TypedDict optional field
        )
    elif isinstance(message, ToolResponseMessage):
        out = OpenAIChatCompletionToolMessage(
            role="tool",
            tool_call_id=message.call_id,
-            content=await _convert_message_content(message.content),
+            content=await _convert_message_content(message.content),  # type: ignore[typeddict-item]  # content union type incompatible with TypedDict str requirement
        )
    elif isinstance(message, SystemMessage):
        out = OpenAIChatCompletionSystemMessage(
            role="system",
-            content=await _convert_message_content(message.content),
+            content=await _convert_message_content(message.content),  # type: ignore[typeddict-item]  # content union type incompatible with TypedDict str requirement
        )
    else:
        raise ValueError(f"Unsupported message type: {type(message)}")
@ -758,16 +764,16 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
    function = out["function"]
    if isinstance(tool.tool_name, BuiltinTool):
-        function["name"] = tool.tool_name.value
+        function["name"] = tool.tool_name.value  # type: ignore[index]  # dict value inferred as Any but mypy sees Collection[str]
    else:
-        function["name"] = tool.tool_name
+        function["name"] = tool.tool_name  # type: ignore[index]  # dict value inferred as Any but mypy sees Collection[str]
    if tool.description:
-        function["description"] = tool.description
+        function["description"] = tool.description  # type: ignore[index]  # dict value inferred as Any but mypy sees Collection[str]
    if tool.input_schema:
        # Pass through the entire JSON Schema as-is
-        function["parameters"] = tool.input_schema
+        function["parameters"] = tool.input_schema  # type: ignore[index]  # dict value inferred as Any but mypy sees Collection[str]
    # NOTE: OpenAI does not support output_schema, so we drop it here
    # It's stored in LlamaStack for validation and other provider usage
@ -815,15 +821,15 @@ def _convert_openai_request_tool_config(tool_choice: str | dict[str, Any] | None
    tool_config = ToolConfig()
    if tool_choice:
        try:
-            tool_choice = ToolChoice(tool_choice)
+            tool_choice = ToolChoice(tool_choice)  # type: ignore[assignment]  # reassigning to enum narrows union but mypy can't track after exception
        except ValueError:
            pass
-        tool_config.tool_choice = tool_choice
+        tool_config.tool_choice = tool_choice  # type: ignore[assignment]  # ToolConfig.tool_choice accepts Union[ToolChoice, dict] but mypy tracks narrower type
    return tool_config
 def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
-    lls_tools = []
+    lls_tools: list[ToolDefinition] = []
    if not tools:
        return lls_tools
@ -843,16 +849,16 @@ def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) ->
 def _convert_openai_request_response_format(
-    response_format: OpenAIResponseFormatParam = None,
+    response_format: OpenAIResponseFormatParam | None = None,
 ):
    if not response_format:
        return None
    # response_format can be a dict or a pydantic model
-    response_format = dict(response_format)
+    response_format_dict = dict(response_format)  # type: ignore[arg-type]  # OpenAIResponseFormatParam union needs dict conversion
-    if response_format.get("type", "") == "json_schema":
+    if response_format_dict.get("type", "") == "json_schema":
        return JsonSchemaResponseFormat(
-            type="json_schema",
+            type="json_schema",  # type: ignore[arg-type]  # Literal["json_schema"] incompatible with expected type
-            json_schema=response_format.get("json_schema", {}).get("schema", ""),
+            json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
        )
    return None
@ -938,16 +944,15 @@ def _convert_openai_sampling_params(
    # Map an explicit temperature of 0 to greedy sampling
    if temperature == 0:
-        strategy = GreedySamplingStrategy()
+        sampling_params.strategy = GreedySamplingStrategy()
    else:
        # OpenAI defaults to 1.0 for temperature and top_p if unset
        if temperature is None:
            temperature = 1.0
        if top_p is None:
            top_p = 1.0
-        strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p)
+        sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p)  # type: ignore[assignment]  # SamplingParams.strategy union accepts this type
    sampling_params.strategy = strategy
    return sampling_params
@ -957,23 +962,24 @@ def openai_messages_to_messages(
    """
    Convert a list of OpenAIChatCompletionMessage into a list of Message.
    """
-    converted_messages = []
+    converted_messages: list[Message] = []
    for message in messages:
        converted_message: Message
        if message.role == "system":
-            converted_message = SystemMessage(content=openai_content_to_content(message.content))
+            converted_message = SystemMessage(content=openai_content_to_content(message.content))  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
        elif message.role == "user":
-            converted_message = UserMessage(content=openai_content_to_content(message.content))
+            converted_message = UserMessage(content=openai_content_to_content(message.content))  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
        elif message.role == "assistant":
            converted_message = CompletionMessage(
-                content=openai_content_to_content(message.content),
+                content=openai_content_to_content(message.content),  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
-                tool_calls=_convert_openai_tool_calls(message.tool_calls),
+                tool_calls=_convert_openai_tool_calls(message.tool_calls) if message.tool_calls else [],  # type: ignore[arg-type]  # OpenAI tool_calls type incompatible with conversion function
                stop_reason=StopReason.end_of_turn,
            )
        elif message.role == "tool":
            converted_message = ToolResponseMessage(
                role="tool",
                call_id=message.tool_call_id,
-                content=openai_content_to_content(message.content),
+                content=openai_content_to_content(message.content),  # type: ignore[arg-type]  # OpenAI SDK uses aliased types internally that mypy sees as incompatible with base types
            )
        else:
            raise ValueError(f"Unknown role {message.role}")
@ -990,9 +996,9 @@ def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionConten
        return [openai_content_to_content(c) for c in content]
    elif hasattr(content, "type"):
        if content.type == "text":
-            return TextContentItem(type="text", text=content.text)
+            return TextContentItem(type="text", text=content.text)  # type: ignore[attr-defined]  # Iterable narrowed by hasattr check but mypy doesn't track
        elif content.type == "image_url":
-            return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))
+            return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))  # type: ignore[attr-defined]  # Iterable narrowed by hasattr check but mypy doesn't track
        else:
            raise ValueError(f"Unknown content type: {content.type}")
    else:
@ -1041,9 +1047,9 @@ def convert_openai_chat_completion_choice(
        completion_message=CompletionMessage(
            content=choice.message.content or "",  # CompletionMessage content is not optional
            stop_reason=_convert_openai_finish_reason(choice.finish_reason),
-            tool_calls=_convert_openai_tool_calls(choice.message.tool_calls),
+            tool_calls=_convert_openai_tool_calls(choice.message.tool_calls) if choice.message.tool_calls else [],  # type: ignore[arg-type]  # OpenAI tool_calls Optional type broadens union
        ),
-        logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)),
+        logprobs=_convert_openai_logprobs(getattr(choice, "logprobs", None)),  # type: ignore[arg-type]  # getattr returns Any, can't narrow without inspection
    )
@ -1070,7 +1076,7 @@ async def convert_openai_chat_completion_stream(
        choice = chunk.choices[0]  # assuming only one choice per chunk
        # we assume there's only one finish_reason in the stream
-        stop_reason = _convert_openai_finish_reason(choice.finish_reason) or stop_reason
+        stop_reason = _convert_openai_finish_reason(choice.finish_reason) if choice.finish_reason else stop_reason
        logprobs = getattr(choice, "logprobs", None)
        # if there's a tool call, emit an event for each tool in the list
@ -1083,7 +1089,7 @@ async def convert_openai_chat_completion_stream(
                    event=ChatCompletionResponseEvent(
                        event_type=event_type,
                        delta=TextDelta(text=choice.delta.content),
-                        logprobs=_convert_openai_logprobs(logprobs),
+                        logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
                    )
                )
@ -1101,10 +1107,10 @@ async def convert_openai_chat_completion_stream(
                        event=ChatCompletionResponseEvent(
                            event_type=event_type,
                            delta=ToolCallDelta(
-                                tool_call=_convert_openai_tool_calls([tool_call])[0],
+                                tool_call=_convert_openai_tool_calls([tool_call])[0],  # type: ignore[arg-type, list-item]  # delta tool_call type differs from complete tool_call
                                parse_status=ToolCallParseStatus.succeeded,
                            ),
-                            logprobs=_convert_openai_logprobs(logprobs),
+                            logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
                        )
                    )
            else:
@ -1125,12 +1131,15 @@ async def convert_openai_chat_completion_stream(
                        if tool_call.function.name:
                            buffer["name"] = tool_call.function.name
                            delta = f"{buffer['name']}("
-                            buffer["content"] += delta
+                            if buffer["content"] is not None:
                                buffer["content"] += delta
                        if tool_call.function.arguments:
                            delta = tool_call.function.arguments
-                            buffer["arguments"] += delta
+                            if buffer["arguments"] is not None and delta:
-                            buffer["content"] += delta
+                                buffer["arguments"] += delta
                            if buffer["content"] is not None and delta:
                                buffer["content"] += delta
                        yield ChatCompletionResponseStreamChunk(
                            event=ChatCompletionResponseEvent(
@ -1139,7 +1148,7 @@ async def convert_openai_chat_completion_stream(
                                    tool_call=delta,
                                    parse_status=ToolCallParseStatus.in_progress,
                                ),
-                                logprobs=_convert_openai_logprobs(logprobs),
+                                logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
                            )
                        )
        elif choice.delta.content:
@ -1147,7 +1156,7 @@ async def convert_openai_chat_completion_stream(
                event=ChatCompletionResponseEvent(
                    event_type=event_type,
                    delta=TextDelta(text=choice.delta.content or ""),
-                    logprobs=_convert_openai_logprobs(logprobs),
+                    logprobs=_convert_openai_logprobs(logprobs),  # type: ignore[arg-type]  # logprobs type broadened from getattr result
                )
            )
@ -1155,7 +1164,8 @@ async def convert_openai_chat_completion_stream(
        logger.debug(f"toolcall_buffer[{idx}]: {buffer}")
        if buffer["name"]:
            delta = ")"
-            buffer["content"] += delta
+            if buffer["content"] is not None:
                buffer["content"] += delta
            yield ChatCompletionResponseStreamChunk(
                event=ChatCompletionResponseEvent(
                    event_type=event_type,
@ -1168,16 +1178,16 @@ async def convert_openai_chat_completion_stream(
            )
            try:
-                tool_call = ToolCall(
+                parsed_tool_call = ToolCall(
-                    call_id=buffer["call_id"],
+                    call_id=buffer["call_id"] or "",
-                    tool_name=buffer["name"],
+                    tool_name=buffer["name"] or "",
-                    arguments=buffer["arguments"],
+                    arguments=buffer["arguments"] or "",
                )
                yield ChatCompletionResponseStreamChunk(
                    event=ChatCompletionResponseEvent(
                        event_type=ChatCompletionResponseEventType.progress,
                        delta=ToolCallDelta(
-                            tool_call=tool_call,
+                            tool_call=parsed_tool_call,  # type: ignore[arg-type]  # ToolCallDelta.tool_call accepts Union[str, ToolCall]
                            parse_status=ToolCallParseStatus.succeeded,
                        ),
                        stop_reason=stop_reason,
@ -1189,7 +1199,7 @@ async def convert_openai_chat_completion_stream(
                    event=ChatCompletionResponseEvent(
                        event_type=ChatCompletionResponseEventType.progress,
                        delta=ToolCallDelta(
-                            tool_call=buffer["content"],
+                            tool_call=buffer["content"],  # type: ignore[arg-type]  # ToolCallDelta.tool_call accepts Union[str, ToolCall]
                            parse_status=ToolCallParseStatus.failed,
                        ),
                        stop_reason=stop_reason,
@ -1250,7 +1260,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
        top_p: float | None = None,
        user: str | None = None,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        messages = openai_messages_to_messages(messages)
+        messages = openai_messages_to_messages(messages)  # type: ignore[assignment]  # converted from OpenAI to LlamaStack message format
        response_format = _convert_openai_request_response_format(response_format)
        sampling_params = _convert_openai_sampling_params(
            max_tokens=max_tokens,
@ -1259,15 +1269,15 @@ class OpenAIChatCompletionToLlamaStackMixin:
        )
        tool_config = _convert_openai_request_tool_config(tool_choice)
-        tools = _convert_openai_request_tools(tools)
+        tools = _convert_openai_request_tools(tools)  # type: ignore[assignment]  # converted from OpenAI to LlamaStack tool format
        if tool_config.tool_choice == ToolChoice.none:
-            tools = []
+            tools = []  # type: ignore[assignment]  # empty list narrows return type but mypy tracks broader type
        outstanding_responses = []
        # "n" is the number of completions to generate per prompt
        n = n or 1
        for _i in range(0, n):
-            response = self.chat_completion(
+            response = self.chat_completion(  # type: ignore[attr-defined]  # mixin expects class to implement chat_completion
                model_id=model,
                messages=messages,
                sampling_params=sampling_params,
@ -1279,7 +1289,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
            outstanding_responses.append(response)
        if stream:
-            return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)
+            return OpenAIChatCompletionToLlamaStackMixin._process_stream_response(self, model, outstanding_responses)  # type: ignore[no-any-return]  # mixin async generator return type too complex for mypy
        return await OpenAIChatCompletionToLlamaStackMixin._process_non_stream_response(
            self, model, outstanding_responses
@ -1295,14 +1305,16 @@ class OpenAIChatCompletionToLlamaStackMixin:
            response = await outstanding_response
            async for chunk in response:
                event = chunk.event
-                finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
+                finish_reason = (
                    _convert_stop_reason_to_openai_finish_reason(event.stop_reason) if event.stop_reason else None
                )
                if isinstance(event.delta, TextDelta):
                    text_delta = event.delta.text
                    delta = OpenAIChoiceDelta(content=text_delta)
                    yield OpenAIChatCompletionChunk(
                        id=id,
-                        choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],
+                        choices=[OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)],  # type: ignore[arg-type]  # finish_reason Optional[str] incompatible with Literal union
                        created=int(time.time()),
                        model=model,
                        object="chat.completion.chunk",
@ -1310,13 +1322,17 @@ class OpenAIChatCompletionToLlamaStackMixin:
                elif isinstance(event.delta, ToolCallDelta):
                    if event.delta.parse_status == ToolCallParseStatus.succeeded:
                        tool_call = event.delta.tool_call
                        if isinstance(tool_call, str):
                            continue
                        # First chunk includes full structure
                        openai_tool_call = OpenAIChoiceDeltaToolCall(
                            index=0,
                            id=tool_call.call_id,
                            function=OpenAIChoiceDeltaToolCallFunction(
-                                name=tool_call.tool_name,
+                                name=tool_call.tool_name
                                if isinstance(tool_call.tool_name, str)
                                else tool_call.tool_name.value,  # type: ignore[arg-type]  # enum .value extraction on Union confuses mypy
                                arguments="",
                            ),
                        )
@ -1324,7 +1340,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
                        yield OpenAIChatCompletionChunk(
                            id=id,
                            choices=[
-                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
+                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)  # type: ignore[arg-type]  # finish_reason Optional[str] incompatible with Literal union
                            ],
                            created=int(time.time()),
                            model=model,
@ -1341,7 +1357,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
                        yield OpenAIChatCompletionChunk(
                            id=id,
                            choices=[
-                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
+                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)  # type: ignore[arg-type]  # finish_reason Optional[str] incompatible with Literal union
                            ],
                            created=int(time.time()),
                            model=model,
@ -1351,7 +1367,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
    async def _process_non_stream_response(
        self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]]
    ) -> OpenAIChatCompletion:
-        choices = []
+        choices: list[OpenAIChatCompletionChoice] = []
        for outstanding_response in outstanding_responses:
            response = await outstanding_response
            completion_message = response.completion_message
@ -1360,14 +1376,14 @@ class OpenAIChatCompletionToLlamaStackMixin:
            choice = OpenAIChatCompletionChoice(
                index=len(choices),
-                message=message,
+                message=message,  # type: ignore[arg-type]  # OpenAIChatCompletionMessage union incompatible with narrower Message type
                finish_reason=finish_reason,
            )
-            choices.append(choice)
+            choices.append(choice)  # type: ignore[arg-type]  # OpenAIChatCompletionChoice type annotation mismatch
        return OpenAIChatCompletion(
            id=f"chatcmpl-{uuid.uuid4()}",
-            choices=choices,
+            choices=choices,  # type: ignore[arg-type]  # list[OpenAIChatCompletionChoice] union incompatible
            created=int(time.time()),
            model=model,
            object="chat.completion",
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@ -196,6 +196,7 @@ def make_overlapped_chunks(
        chunks.append(
            Chunk(
                content=chunk,
                chunk_id=chunk_id,
                metadata=chunk_metadata,
                chunk_metadata=backend_chunk_metadata,
            )
--- a/src/llama_stack/strong_typing/inspection.py
+++ b/src/llama_stack/strong_typing/inspection.py
@ -430,6 +430,32 @@ def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
    return list_type  # type: ignore[no-any-return]
 def is_generic_sequence(typ: object) -> bool:
    "True if the specified type is a generic Sequence, i.e. `Sequence[T]`."
    import collections.abc
    typ = unwrap_annotated_type(typ)
    return typing.get_origin(typ) is collections.abc.Sequence
 def unwrap_generic_sequence(typ: object) -> type:
    """
    Extracts the item type of a Sequence type.
    :param typ: The Sequence type `Sequence[T]`.
    :returns: The item type `T`.
    """
    return rewrap_annotated_type(_unwrap_generic_sequence, typ)  # type: ignore[arg-type]
 def _unwrap_generic_sequence(typ: object) -> type:
    "Extracts the item type of a Sequence type (e.g. returns `T` for `Sequence[T]`)."
    (sequence_type,) = typing.get_args(typ)  # unpack single tuple element
    return sequence_type  # type: ignore[no-any-return]
 def is_generic_set(typ: object) -> TypeGuard[type[set]]:
    "True if the specified type is a generic set, i.e. `Set[T]`."
--- a/src/llama_stack/strong_typing/name.py
+++ b/src/llama_stack/strong_typing/name.py
@ -18,10 +18,12 @@ from .inspection import (
    TypeLike,
    is_generic_dict,
    is_generic_list,
    is_generic_sequence,
    is_type_optional,
    is_type_union,
    unwrap_generic_dict,
    unwrap_generic_list,
    unwrap_generic_sequence,
    unwrap_optional_type,
    unwrap_union_types,
 )
@ -155,24 +157,28 @@ def python_type_to_name(data_type: TypeLike, force: bool = False) -> str:
    if metadata is not None:
        # type is Annotated[T, ...]
        arg = typing.get_args(data_type)[0]
-        return python_type_to_name(arg)
+        return python_type_to_name(arg, force=force)
    if force:
        # generic types
        if is_type_optional(data_type, strict=True):
-            inner_name = python_type_to_name(unwrap_optional_type(data_type))
+            inner_name = python_type_to_name(unwrap_optional_type(data_type), force=True)
            return f"Optional__{inner_name}"
        elif is_generic_list(data_type):
-            item_name = python_type_to_name(unwrap_generic_list(data_type))
+            item_name = python_type_to_name(unwrap_generic_list(data_type), force=True)
            return f"List__{item_name}"
        elif is_generic_sequence(data_type):
            # Treat Sequence the same as List for schema generation purposes
            item_name = python_type_to_name(unwrap_generic_sequence(data_type), force=True)
            return f"List__{item_name}"
        elif is_generic_dict(data_type):
            key_type, value_type = unwrap_generic_dict(data_type)
-            key_name = python_type_to_name(key_type)
+            key_name = python_type_to_name(key_type, force=True)
-            value_name = python_type_to_name(value_type)
+            value_name = python_type_to_name(value_type, force=True)
            return f"Dict__{key_name}__{value_name}"
        elif is_type_union(data_type):
            member_types = unwrap_union_types(data_type)
-            member_names = "__".join(python_type_to_name(member_type) for member_type in member_types)
+            member_names = "__".join(python_type_to_name(member_type, force=True) for member_type in member_types)
            return f"Union__{member_names}"
    # named system or user-defined type
--- a/src/llama_stack/strong_typing/schema.py
+++ b/src/llama_stack/strong_typing/schema.py
@ -111,7 +111,7 @@ def get_class_property_docstrings(
 def docstring_to_schema(data_type: type) -> Schema:
    short_description, long_description = get_class_docstrings(data_type)
    schema: Schema = {
-        "title": python_type_to_name(data_type),
+        "title": python_type_to_name(data_type, force=True),
    }
    description = "\n".join(filter(None, [short_description, long_description]))
@ -417,6 +417,10 @@ class JsonSchemaGenerator:
        if origin_type is list:
            (list_type,) = typing.get_args(typ)  # unpack single tuple element
            return {"type": "array", "items": self.type_to_schema(list_type)}
        elif origin_type is collections.abc.Sequence:
            # Treat Sequence the same as list for JSON schema (both are arrays)
            (sequence_type,) = typing.get_args(typ)  # unpack single tuple element
            return {"type": "array", "items": self.type_to_schema(sequence_type)}
        elif origin_type is dict:
            key_type, value_type = typing.get_args(typ)
            if not (key_type is str or key_type is int or is_type_enum(key_type)):
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -39,7 +39,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
    if provider.provider_type in (
        "inline::meta-reference",
        "inline::sentence-transformers",
-        "inline::vllm",
+        "remote::vllm",
        "remote::bedrock",
        "remote::databricks",
        # Technically Nvidia does support OpenAI completions, but none of their hosted models
@ -120,7 +120,7 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
    if provider.provider_type in (
        "inline::meta-reference",
        "inline::sentence-transformers",
-        "inline::vllm",
+        "remote::vllm",
        "remote::bedrock",
        "remote::databricks",
        "remote::cerebras",
--- a/tests/integration/responses/recordings/00cc2202e2906845aec8fe97f0e31e55abd32a289a516722ccab502c4e312c2c.json
+++ b/tests/integration/responses/recordings/00cc2202e2906845aec8fe97f0e31e55abd32a289a516722ccab502c4e312c2c.json
--- a/tests/integration/responses/recordings/05434d44cd8a093bcb70e8978fc7a35ecf13e14a531c0d08c025e798dc6796d7.json
+++ b/tests/integration/responses/recordings/05434d44cd8a093bcb70e8978fc7a35ecf13e14a531c0d08c025e798dc6796d7.json
@ -0,0 +1,763 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_UKFNZA0eSkL6fZHbs8ygBd5W",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-861837565219, score: 0.015252742239920682, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-861837565219', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-861837565219|>)\nLlama 4 Maverick has 128 experts\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "OEZj77MujzEilF"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "aZ37vwWHFrpGy"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " L",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "csghpwq82thpEG"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "lama",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "1dRxATyjFkzZ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "DkAEGxNVXrhL9KJ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "4",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "SI7v0ofTi6JL0LP"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " Maver",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "tThgm0YItJ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "ick",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "5UnIV9ZM2koPE"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " model",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "pFPs5HfBSA"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " has",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CIT42IHpAEgx"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "jpXixTaXlYSxTu3"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "128",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "IBEKia6bwNtLB"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "hHMPPr4Q"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": " <",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "iGTIWlxj9c2Equ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "wfQImUZLNC8Dtgc"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "file",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "m21wFuqSLpMN"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "-",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CP5N1QxHqEnzbnq"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "861",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "jgQZ9egEpAiQv"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "837",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "viNedPoe13lJJ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "565",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "j2gGBSzOagN98"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "219",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "d4iMNITon2xM3"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "67lYY4LnZsfKd3U"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": ">.",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "bMllpJPicr01Ip"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "ZgWEFMbo3w"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-05434d44cd8a",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 23,
            "prompt_tokens": 352,
            "total_tokens": 375,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "Wwt10anxWJDla"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/2f5d0087ba947141d94b9ba6462c03ff01d8f4948fedd8fd84cabfa80f5f0373.json
+++ b/tests/integration/responses/recordings/2f5d0087ba947141d94b9ba6462c03ff01d8f4948fedd8fd84cabfa80f5f0373.json
--- a/tests/integration/responses/recordings/300c5041332a0ad2990a05df88a6b6842e02157d807564c136dc71cffe2b78cc.json
+++ b/tests/integration/responses/recordings/300c5041332a0ad2990a05df88a6b6842e02157d807564c136dc71cffe2b78cc.json
--- a/tests/integration/responses/recordings/40985d2e0ff82751e23c442f40346889f32138a4931f7ff275d6f93aaba6b21c.json
+++ b/tests/integration/responses/recordings/40985d2e0ff82751e23c442f40346889f32138a4931f7ff275d6f93aaba6b21c.json
@ -0,0 +1,767 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts_pdf]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_M8gyYiB39MwYdJKc4aHIGbfA",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 2 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-379221123213, score: 0.008294223715346738, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': 'c3556aea-3b73-0278-aa16-ebbdb4c18b18', 'document_id': 'file-379221123213', 'token_count': 98.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\n,  \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n  \nLlama  4  Maverick  \n Llama  4  Maverick  is  a  Mixture-of-Experts  (MoE)  model  with  17  billion  active  parameters  and  128  experts.   \n"
            },
            {
              "type": "text",
              "text": "[2] document_id: file-379221123213, score: 0.0033899213359898477, attributes: {'filename': 'llama_stack_and_models.pdf', 'chunk_id': '16d99c69-8323-27ce-3bd7-7b51dcac2735', 'document_id': 'file-379221123213', 'token_count': 498.0, 'metadata_token_count': 11.0} (cite as <|file-379221123213|>)\nLlama  Stack  \nLlama  Stack  Overview  \nLlama  Stack  standardizes  the  core  building  blocks  that  simplify  AI  application  development.  It  codifies  best  \npractices\n \nacross\n \nthe\n \nLlama\n \necosystem.\n \nMore\n \nspecifically,\n \nit\n \nprovides\n  \u25cf  Unified  API  layer  for  Inference,  RAG,  Agents,  Tools,  Safety,  Evals,  and  Telemetry.  \u25cf  Plugin  architecture  to  support  the  rich  ecosystem  of  different  API  implementations  in  various  \nenvironments,\n \nincluding\n \nlocal\n \ndevelopment,\n \non-premises,\n \ncloud,\n \nand\n \nmobile.\n \u25cf  Prepackaged  verified  distributions  which  offer  a  one-stop  solution  for  developers  to  get  started  quickly  \nand\n \nreliably\n \nin\n \nany\n \nenvironment.\n \u25cf  Multiple  developer  interfaces  like  CLI  and  SDKs  for  Python,  Typescript,  iOS,  and  Android.  \u25cf  Standalone  applications  as  examples  for  how  to  build  production-grade  AI  applications  with  Llama  \nStack.\n \nLlama  Stack  Benefits  \n\u25cf  Flexible  Options:  Developers  can  choose  their  preferred  infrastructure  without  changing  APIs  and  enjoy  \nflexible\n \ndeployment\n \nchoices.\n \u25cf  Consistent  Experience:  With  its  unified  APIs,  Llama  Stack  makes  it  easier  to  build,  test,  and  deploy  AI  \napplications\n \nwith\n \nconsistent\n \napplication\n \nbehavior.\n \u25cf  Robust  Ecosystem:  Llama  Stack  is  already  integrated  with  distribution  partners  (cloud  providers,  \nhardware\n \nvendors,\n \nand\n \nAI-focused\n \ncompanies)\n \nthat\n \noffer\n \ntailored\n \ninfrastructure,\n \nsoftware,\n \nand\n \nservices\n \nfor\n \ndeploying\n \nLlama\n \nmodels.\n  \nLlama  4  Maverick  \n Llama  4  Maverick  is  a  Mixture-of-Experts  (MoE)  model  with  17  billion  active  parameters  and  128  experts.   \n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "SH6nRcfXzd8qPg"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "vbJu1mhpQKtNr"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " L",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "uAUiYAVpMW8Ph9"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "lama",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "DJxjs1HFugOD"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "sU2IncrauGmuYki"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "4",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "IkZbrWS45cqkmqi"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " Maver",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "YbZYhGgoGE"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "ick",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "7FtHnapGtkc09"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " model",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "8P3mUr7HfV"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " has",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "WxYXJUfkyxqZ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "E9hIXNC7oeJcZ8v"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "128",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "L9ww7cI1pSSt3"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "hHao5x7a"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": " <",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "0cwygEJttBgv7M"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "KYVCnE5AA6MnQ0Y"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "file",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "N3DcYBcrQDzD"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "-",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CUpjI7Qo17k4aeo"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "379",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "s1694CAHwowUf"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "221",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "I94vCKkpQNsx6"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "123",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "RNfAfPtJK3KHE"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "213",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Gk04vo9RXpl3P"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "rkWPIUdNABAeP7V"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": ">.",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "GIF1vPXxInWrhl"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Oa1imYdRme"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-40985d2e0ff8",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 23,
            "prompt_tokens": 1048,
            "total_tokens": 1071,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "0Xx3txQF13S"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/40a41380ede0dd4635618b64a166f89979aa6c479a626155f36045a677abe944.json
+++ b/tests/integration/responses/recordings/40a41380ede0dd4635618b64a166f89979aa6c479a626155f36045a677abe944.json
--- a/tests/integration/responses/recordings/454a64d08460d26028e99324c80366c46163710158867d4f2178b3fe3b2f76a7.json
+++ b/tests/integration/responses/recordings/454a64d08460d26028e99324c80366c46163710158867d4f2178b3fe3b2f76a7.json
@ -0,0 +1,925 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_FzhOmTdZThRndI5rSASPdAqr",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Lk9Xf7hCFPS2tT"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "14pQ6XFvX7eSh"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " L",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "gPEg73EpAxR3FC"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "lama",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "ZWJl6Mzcv95d"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "zEYaSNtwtGmhfwy"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "4",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "2tesGAvAkEOb8T6"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " Maver",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Hykn5kSQlG"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "ick",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "xWW13SGjSybVX"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " model",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "fAZjisJ63a"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " has",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "FlTpZNfFG6rX"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "9J9VrtXuLHug6II"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "128",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "0EckZGr823mA9"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "dW7O5HFR"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " in",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "5dRdaDvaXumkV"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " its",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "kD1aZsGwZhMx"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " mixture",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "IpxDJF0p"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " of",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "WbnOG310xKaLq"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "sh58U2d8"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " architecture",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "El3"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": " <",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "u3EtYZFJGaheZj"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "QjdqqIuk8c7wMUp"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "file",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Zqcwf53n0hUw"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "-",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "DfFLPM5V45QUiAm"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "797",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "55snCUEJgoLyX"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "509",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "pCqEKhy1wq8Vl"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "666",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "c5QnCsKzuhFd0"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "839",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "jFSbryUeH7ZyA"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "uHktQBYsC92laeK"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": ">.",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "UUxHP1QGdz8MdR"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "uExxZzWuXd"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-454a64d08460",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 29,
            "prompt_tokens": 359,
            "total_tokens": 388,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "EjpA6XzHVgcj8"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/4d749d8c25ad190e43e17c57cec0bf2f4641c80e86242a8021af9d041488b6a7.json
+++ b/tests/integration/responses/recordings/4d749d8c25ad190e43e17c57cec0bf2f4641c80e86242a8021af9d041488b6a7.json
@ -0,0 +1,631 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_FzhOmTdZThRndI5rSASPdAqr",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_FzhOmTdZThRndI5rSASPdAqr",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-797509666839, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-797509666839', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-797509666839|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        },
        {
          "role": "assistant",
          "content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-797509666839|>."
        },
        {
          "role": "user",
          "content": "Can you tell me more about the architecture?"
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": [
                  {
                    "index": 0,
                    "id": "call_y4Py1L2VscRQ5IBZ7gGpqpWv",
                    "function": {
                      "arguments": "",
                      "name": "knowledge_search"
                    },
                    "type": "function"
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "iFdF"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "{\"",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "gIC"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "query",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "P"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "\":\"",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "p"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "L",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "TAVud"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "lama",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CX"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " ",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "hHmE5"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "4",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CN4uS"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " Maver",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": ""
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "ick",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "0kI"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " model",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": ""
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " architecture",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "dyryTBF49"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "\"}",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "BHV"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "tool_calls",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "qrKh"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-4d749d8c25ad",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 22,
            "prompt_tokens": 404,
            "total_tokens": 426,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "ecpBTD3qjc75r"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/5a3033c4d989d68cc418014d7b8ed7bbb5d6e538bd3620dec2f846e0c8fa52f8.json
+++ b/tests/integration/responses/recordings/5a3033c4d989d68cc418014d7b8ed7bbb5d6e538bd3620dec2f846e0c8fa52f8.json
--- a/tests/integration/responses/recordings/6d20aac5318b8bf5803c05c224e7ca6d5b5951df5408e6bca3d0ba2b963f2c73.json
+++ b/tests/integration/responses/recordings/6d20aac5318b8bf5803c05c224e7ca6d5b5951df5408e6bca3d0ba2b963f2c73.json
--- a/tests/integration/responses/recordings/6e5759a3bd65f94c5ec325ee211fcae819b51d6877edc656548d863bd9b5652e.json
+++ b/tests/integration/responses/recordings/6e5759a3bd65f94c5ec325ee211fcae819b51d6877edc656548d863bd9b5652e.json
--- a/tests/integration/responses/recordings/82038830a1ad60e4e01fb5efafd760b6327f0b7e6e7fa4e80518bff9f6002e8f.json
+++ b/tests/integration/responses/recordings/82038830a1ad60e4e01fb5efafd760b6327f0b7e6e7fa4e80518bff9f6002e8f.json
--- a/tests/integration/responses/recordings/882e7f0e5fcfe9f3276692c344dc2fee082b189494dd4f4829825adc90a79d9c.json
+++ b/tests/integration/responses/recordings/882e7f0e5fcfe9f3276692c344dc2fee082b189494dd4f4829825adc90a79d9c.json
--- a/tests/integration/responses/recordings/baa0ba98b7f3de76797bba6cf8294e3ee988fdd1be8385789959b81362ea8194.json
+++ b/tests/integration/responses/recordings/baa0ba98b7f3de76797bba6cf8294e3ee988fdd1be8385789959b81362ea8194.json
@ -0,0 +1,763 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_non_streaming_file_search[client_with_models-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-llama_experts]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model experts count\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_gZXRKN1HMDC16NP9wNPAkP9K",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-864460993305, score: 0.011418752464355166, attributes: {'filename': 'test_response_non_streaming_file_search.txt', 'chunk_id': '869ae0c0-ab85-ca6f-e5d0-024381443c27', 'document_id': 'file-864460993305', 'token_count': 10.0, 'metadata_token_count': 13.0} (cite as <|file-864460993305|>)\nLlama 4 Maverick has 128 experts\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model experts count\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "VvS2zeV5Z8apdX"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "NeElmbFuPxg9F"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " L",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "RA2Dv6fH3Xp28d"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "lama",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "mk2wpBSl9esL"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "WkghQrNy7WNFz7S"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "4",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "LOo1ya1Av8yejuX"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " Maver",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Uj02OVTEBb"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "ick",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "7s3FiwwwgzGhy"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " model",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "WExrPT6Yjd"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " has",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "vbf0YwoBbJsB"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "vYIgV2n0AuxwZ9F"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "128",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "lAS4gXrK4sNoq"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "90lGUcaB"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": " <",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "mnFZfKgXWsjWZe"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "eOcwjhvK0vIp2nj"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "file",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "5TijFZHKoeGs"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "-",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "MWGjx7wiu4tdFha"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "864",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "k9VH32AhyY519"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "460",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "dWxZtp4i8KhxZ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "993",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "u2WHjDkGJE2hg"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "305",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "6fckZytfB9iS5"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "YGOP75uha3KyHao"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": ">.",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "emmym2mGHhvw9Q"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "GoEMFfNFBW"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-baa0ba98b7f3",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 23,
            "prompt_tokens": 350,
            "total_tokens": 373,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "ec6S325i8izl1"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/bb43ffac0034ef7fcca1786bcb53106b37f70f053c38a92e225f4107e48c9c72.json
+++ b/tests/integration/responses/recordings/bb43ffac0034ef7fcca1786bcb53106b37f70f053c38a92e225f4107e48c9c72.json
--- a/tests/integration/responses/recordings/bb8ad4fa0847c0b408d8bfeb6cc6bc65d4afece55df8e8187dfdbf75d57b13ba.json
+++ b/tests/integration/responses/recordings/bb8ad4fa0847c0b408d8bfeb6cc6bc65d4afece55df8e8187dfdbf75d57b13ba.json
--- a/tests/integration/responses/recordings/c0b147807a41960f5ba7bff3ea7ffdc2aa0497548e1a29486e6c18ae900cd335.json
+++ b/tests/integration/responses/recordings/c0b147807a41960f5ba7bff3ea7ffdc2aa0497548e1a29486e6c18ae900cd335.json
@ -0,0 +1,631 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_4ac6gxccWFxDvEl8BizY3BJw",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        },
        {
          "role": "assistant",
          "content": "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture <|file-528246887823|>."
        },
        {
          "role": "user",
          "content": "Can you tell me more about the architecture?"
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": [
                  {
                    "index": 0,
                    "id": "call_2dn6pQIic4tAhxL0Q3R9v9oy",
                    "function": {
                      "arguments": "",
                      "name": "knowledge_search"
                    },
                    "type": "function"
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "U5u2"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "{\"",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "rC6"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "query",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "4"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "\":\"",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "E"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "L",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "U1RKZ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "lama",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "N9"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " ",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "eCM84"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "4",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "RNtZo"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " Maver",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": ""
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "ick",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "OmQ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " model",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": ""
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": " architecture",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Hd8hPZl2u"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": [
                  {
                    "index": 0,
                    "id": null,
                    "function": {
                      "arguments": "\"}",
                      "name": null
                    },
                    "type": null
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "5bs"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "tool_calls",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "eMIj"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-c0b147807a41",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 22,
            "prompt_tokens": 404,
            "total_tokens": 426,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "ofat2LchRvz8V"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/cf185c8686348b2ba9ca6e45c2cdb631933a920ae003fe48e0b2579c271a1509.json
+++ b/tests/integration/responses/recordings/cf185c8686348b2ba9ca6e45c2cdb631933a920ae003fe48e0b2579c271a1509.json
@ -0,0 +1,925 @@
 {
  "test_id": "tests/integration/responses/test_tool_responses.py::test_response_sequential_file_search[openai_client-txt=openai/gpt-4o:emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "How many experts does the Llama 4 Maverick model have?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_4ac6gxccWFxDvEl8BizY3BJw",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"Llama 4 Maverick model number of experts\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_4ac6gxccWFxDvEl8BizY3BJw",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-528246887823, score: 0.019272299825769716, attributes: {'filename': 'test_sequential_file_search.txt', 'chunk_id': '3907d885-d8e7-a72d-1113-f7080454d97c', 'document_id': 'file-528246887823', 'token_count': 19.0, 'metadata_token_count': 11.0} (cite as <|file-528246887823|>)\nThe Llama 4 Maverick model has 128 experts in its mixture of experts architecture.\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"Llama 4 Maverick model number of experts\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "DzrEfuLOuw4cnb"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CsVsWYnTMLfCu"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " L",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "45hLla9Dhdu3x9"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "lama",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "AhCUnf7tqKqC"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "gvAEwnHAgMzITVb"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "4",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "mGUFWICkd1S0jlx"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " Maver",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "e85JCyNVPe"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "ick",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "5vQf0h4IJTGGt"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " model",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "anovsNqaSC"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " has",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "fS6GYg8pBO8Q"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "vO7onsnvWf5kjUI"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "128",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "pdFjXciA0pN5w"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "eMMaKcAW"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " in",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "JFDRUy7B9ktO0"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " its",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "QlQIiohVPMVQ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " mixture",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "UuR2QmMR"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " of",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "t0uvHdtkB4Fsl"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " experts",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "3G1KX2gw"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " architecture",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "x2J"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": " <",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "fbLYZDlS7xvywf"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "vAxoGpf245DPeM8"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "file",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "gLu1ZShAlH4C"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "-",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "PdMvc8X2LtbhyFU"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "528",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "0S00nwBZD0Cah"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "246",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "fa7s8AYzHjMph"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "887",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "hrwMBgH8bsKYT"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "823",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "NBJ8yJWJjBCCQ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "AAzbONdy9ExzSBR"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": ">.",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "THiCsk4cqjABWJ"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "rzm64SnHTE"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-cf185c868634",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 29,
            "prompt_tokens": 359,
            "total_tokens": 388,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "AnUv1BxAB2uOY"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/d6f74a7dd25a5c91ca381f4d816fc6cb87713fb11c7a33f897c159b5ed146b66.json
+++ b/tests/integration/responses/recordings/d6f74a7dd25a5c91ca381f4d816fc6cb87713fb11c7a33f897c159b5ed146b66.json
@ -0,0 +1,952 @@
 {
  "test_id": "tests/integration/responses/test_file_search.py::test_response_file_search_filter_compound_and[client_with_models-txt=openai/gpt-4o]",
  "request": {
    "method": "POST",
    "url": "https://api.openai.com/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "gpt-4o",
      "messages": [
        {
          "role": "user",
          "content": "What are the engineering updates from the US?"
        },
        {
          "role": "assistant",
          "content": "",
          "tool_calls": [
            {
              "index": 0,
              "id": "call_rST37XuKuJQcEBfmoTnNQzNe",
              "type": "function",
              "function": {
                "name": "knowledge_search",
                "arguments": "{\"query\":\"engineering updates from the US\"}"
              }
            }
          ]
        },
        {
          "role": "tool",
          "tool_call_id": "call_rST37XuKuJQcEBfmoTnNQzNe",
          "content": [
            {
              "type": "text",
              "text": "knowledge_search tool found 1 chunks:\nBEGIN of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "[1] document_id: file-710505118847, score: 0.005345607610573921, attributes: {'region': 'us', 'category': 'engineering', 'date': 1680307200.0, 'filename': 'us_engineering_q2.txt', 'chunk_id': '084e15ad-480a-eae8-9242-391c53854867', 'document_id': 'file-710505118847', 'token_count': 18.0, 'metadata_token_count': 32.0} (cite as <|file-710505118847|>)\nUS technical updates for Q2 2023. New features deployed in the US region.\n"
            },
            {
              "type": "text",
              "text": "END of knowledge_search tool results.\n"
            },
            {
              "type": "text",
              "text": "The above results were retrieved to help answer the user's query: \"engineering updates from the US\". Use them as supporting information only in answering this query. Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). Do not add extra punctuation. Use only the file IDs provided (do not invent new ones).\n"
            }
          ]
        }
      ],
      "stream": true,
      "stream_options": {
        "include_usage": true
      },
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "knowledge_search",
            "description": "Search for information in a database.",
            "parameters": {
              "type": "object",
              "properties": {
                "query": {
                  "type": "string",
                  "description": "The query to search for. Can be a natural language sentence or keywords."
                }
              },
              "required": [
                "query"
              ]
            }
          }
        }
      ]
    },
    "endpoint": "/v1/chat/completions",
    "model": "gpt-4o"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "CVT4TMzBPNlTqA"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Rlj8tcP3E7bOB"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " engineering",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "8lga"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " updates",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "6fwO0WkR"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " from",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "BryajibrQvv"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " the",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "iTlMgikEguMP"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " US",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "79xbcCa6na7en"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " include",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "q7q4AkjT"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " new",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "fiyvaDyv5eet"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " features",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "cBkhZfR"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " deployed",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "EaW5Ixt"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " in",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "xLVfGMTiR4OMS"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " the",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "cncqZQApoIjH"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " region",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "yiSqVtnqF"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " for",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "sbDWGbV8OoYi"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " Q",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "E1ZJCGd5c2IH7b"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "2",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "agHXieAbH98A2VE"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " ",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Ht3DkQwQs7t32Aw"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "202",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "j4r88Vvqcm7VY"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "3",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "pv9GLKOSpa0BHEr"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": " <",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "iBXT8JWz9X1J1q"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "D1gi2w0f0DN5n3k"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "file",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "zxHM3I5wmPGU"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "-",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "Gl7oL62eU6xIrUp"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "710",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "l4RX4sx1BfQA6"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "505",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "AGyEWqU2sDL6e"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "118",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "BReQxn8kTEiA5"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "847",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "yN9PEtunpAkNv"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": "|",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "bKBLmRBkxlk61fP"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": ">.",
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "077BDwQit7hWfz"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [
            {
              "delta": {
                "content": null,
                "function_call": null,
                "refusal": null,
                "role": null,
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": null,
          "obfuscation": "LOYztD3Yfb"
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-d6f74a7dd25a",
          "choices": [],
          "created": 0,
          "model": "gpt-4o-2024-08-06",
          "object": "chat.completion.chunk",
          "service_tier": "default",
          "system_fingerprint": "fp_a788c5aef0",
          "usage": {
            "completion_tokens": 30,
            "prompt_tokens": 364,
            "total_tokens": 394,
            "completion_tokens_details": {
              "accepted_prediction_tokens": 0,
              "audio_tokens": 0,
              "reasoning_tokens": 0,
              "rejected_prediction_tokens": 0
            },
            "prompt_tokens_details": {
              "audio_tokens": 0,
              "cached_tokens": 0
            }
          },
          "obfuscation": "9lHtlsx9YsVH6"
        }
      }
    ],
    "is_streaming": true
  },
  "id_normalization_mapping": {}
 }
--- a/tests/integration/responses/recordings/dd67347dee58190dea53588f8914211d279b80b6198cbe8b8b789fad2a0d0687.json
+++ b/tests/integration/responses/recordings/dd67347dee58190dea53588f8914211d279b80b6198cbe8b8b789fad2a0d0687.json
--- a/tests/integration/responses/recordings/f4cfc578243d8c3e2e61488bfcfc571cbc160a97dc570076a869d4fec1dc8c52.json
+++ b/tests/integration/responses/recordings/f4cfc578243d8c3e2e61488bfcfc571cbc160a97dc570076a869d4fec1dc8c52.json
--- a/tests/integration/responses/recordings/fa055fef7ea5386adaeaa5ddea61a417c161c49f64b9d92de0b96f4a892bc83c.json
+++ b/tests/integration/responses/recordings/fa055fef7ea5386adaeaa5ddea61a417c161c49f64b9d92de0b96f4a892bc83c.json
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -82,23 +82,37 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
@pytest.fixture(scope="session")
 def sample_chunks():
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    chunks_data = [
        (
            "Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
            "doc1",
            "programming",
        ),
        (
            "Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
            "doc2",
            "ai",
        ),
        (
            "Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
            "doc3",
            "computer_science",
        ),
        (
            "Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
            "doc4",
            "ai",
        ),
    ]
    return [
        Chunk(
-            content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
+            content=content,
-            metadata={"document_id": "doc1", "topic": "programming"},
+            chunk_id=generate_chunk_id(doc_id, content),
-        ),
+            metadata={"document_id": doc_id, "topic": topic},
-        Chunk(
+        )
-            content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
+        for content, doc_id, topic in chunks_data
            metadata={"document_id": "doc2", "topic": "ai"},
        ),
        Chunk(
            content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
            metadata={"document_id": "doc3", "topic": "computer_science"},
        ),
        Chunk(
            content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
            metadata={"document_id": "doc4", "topic": "ai"},
        ),
    ]
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@ -13,23 +13,33 @@ from ..conftest import vector_provider_wrapper
@pytest.fixture(scope="session")
 def sample_chunks():
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    chunks_data = [
        (
            "Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
            "doc1",
        ),
        (
            "Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
            "doc2",
        ),
        (
            "Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
            "doc3",
        ),
        (
            "Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
            "doc4",
        ),
    ]
    return [
        Chunk(
-            content="Python is a high-level programming language that emphasizes code readability and allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java.",
+            content=content,
-            metadata={"document_id": "doc1"},
+            chunk_id=generate_chunk_id(doc_id, content),
-        ),
+            metadata={"document_id": doc_id},
-        Chunk(
+        )
-            content="Machine learning is a subset of artificial intelligence that enables systems to automatically learn and improve from experience without being explicitly programmed, using statistical techniques to give computer systems the ability to progressively improve performance on a specific task.",
+        for content, doc_id in chunks_data
            metadata={"document_id": "doc2"},
        ),
        Chunk(
            content="Data structures are fundamental to computer science because they provide organized ways to store and access data efficiently, enable faster processing of data through optimized algorithms, and form the building blocks for more complex software systems.",
            metadata={"document_id": "doc3"},
        ),
        Chunk(
            content="Neural networks are inspired by biological neural networks found in animal brains, using interconnected nodes called artificial neurons to process information through weighted connections that can be trained to recognize patterns and solve complex problems through iterative learning.",
            metadata={"document_id": "doc4"},
        ),
    ]
@ -168,6 +178,7 @@ def test_insert_chunks_with_precomputed_embeddings(
    chunks_with_embeddings = [
        Chunk(
            content="This is a test chunk with precomputed embedding.",
            chunk_id="chunk1",
            metadata={"document_id": "doc1", "source": "precomputed", "chunk_id": "chunk1"},
            embedding=[0.1] * int(embedding_dimension),
        ),
@ -215,9 +226,12 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
    actual_vector_store_id = register_response.id
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    chunks_with_embeddings = [
        Chunk(
            content="duplicate",
            chunk_id=generate_chunk_id("doc1", "duplicate"),
            metadata={"document_id": "doc1", "source": "precomputed"},
            embedding=[0.1] * int(embedding_dimension),
        ),
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@ -192,18 +192,18 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config
    assert session_response.session_id is not None
    # Verify the session was stored
-    session = await agents_impl.get_agents_session(agent_id, session_response.session_id)
+    session = await agents_impl.get_agents_session(session_response.session_id, agent_id)
    assert session.session_name == "test_session"
    assert session.session_id == session_response.session_id
    assert session.started_at is not None
    assert session.turns == []
    # Delete the session
-    await agents_impl.delete_agents_session(agent_id, session_response.session_id)
+    await agents_impl.delete_agents_session(session_response.session_id, agent_id)
    # Verify the session was deleted
    with pytest.raises(ValueError):
-        await agents_impl.get_agents_session(agent_id, session_response.session_id)
+        await agents_impl.get_agents_session(session_response.session_id, agent_id)
@pytest.mark.parametrize("enable_session_persistence", [True, False])
@ -226,11 +226,11 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config,
    assert session2.session_id in session_ids
    # Delete one session
-    await agents_impl.delete_agents_session(agent_id, session1.session_id)
+    await agents_impl.delete_agents_session(session1.session_id, agent_id)
    # Verify the session was deleted
    with pytest.raises(ValueError):
-        await agents_impl.get_agents_session(agent_id, session1.session_id)
+        await agents_impl.get_agents_session(session1.session_id, agent_id)
    # List sessions again
    sessions = await agents_impl.list_agent_sessions(agent_id)
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -43,9 +43,15 @@ def embedding_dimension() -> int:
@pytest.fixture(scope="session")
 def sample_chunks():
    """Generates chunks that force multiple batches for a single document to expose ID conflicts."""
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    n, k = 10, 3
    sample = [
-        Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
+        Chunk(
            content=f"Sentence {i} from document {j}",
            chunk_id=generate_chunk_id(f"document-{j}", f"Sentence {i} from document {j}"),
            metadata={"document_id": f"document-{j}"},
        )
        for j in range(k)
        for i in range(n)
    ]
@ -53,6 +59,7 @@ def sample_chunks():
        [
            Chunk(
                content=f"Sentence {i} from document {j + k}",
                chunk_id=f"document-{j}-chunk-{i}",
                chunk_metadata=ChunkMetadata(
                    document_id=f"document-{j + k}",
                    chunk_id=f"document-{j}-chunk-{i}",
@ -73,6 +80,7 @@ def sample_chunks_with_metadata():
    sample = [
        Chunk(
            content=f"Sentence {i} from document {j}",
            chunk_id=f"document-{j}-chunk-{i}",
            metadata={"document_id": f"document-{j}"},
            chunk_metadata=ChunkMetadata(
                document_id=f"document-{j}",
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@ -49,9 +49,21 @@ def vector_store_id():
@pytest.fixture
 def sample_chunks():
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    return [
-        Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-1"}),
+        Chunk(
-        Chunk(content="MOCK text content 1", mime_type="text/plain", metadata={"document_id": "mock-doc-2"}),
+            content="MOCK text content 1",
            chunk_id=generate_chunk_id("mock-doc-1", "MOCK text content 1"),
            mime_type="text/plain",
            metadata={"document_id": "mock-doc-1"},
        ),
        Chunk(
            content="MOCK text content 1",
            chunk_id=generate_chunk_id("mock-doc-2", "MOCK text content 1"),
            mime_type="text/plain",
            metadata={"document_id": "mock-doc-2"},
        ),
    ]
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@ -434,9 +434,15 @@ async def test_query_chunks_hybrid_tie_breaking(
    sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
 ):
    """Test tie-breaking and determinism when scores are equal."""
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    # Create two chunks with the same content and embedding
-    chunk1 = Chunk(content="identical", metadata={"document_id": "docA"})
+    chunk1 = Chunk(
-    chunk2 = Chunk(content="identical", metadata={"document_id": "docB"})
+        content="identical", chunk_id=generate_chunk_id("docA", "identical"), metadata={"document_id": "docA"}
    )
    chunk2 = Chunk(
        content="identical", chunk_id=generate_chunk_id("docB", "identical"), metadata={"document_id": "docB"}
    )
    chunks = [chunk1, chunk2]
    # Use the same embedding for both chunks to ensure equal scores
    same_embedding = sample_embeddings[0]
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -135,10 +135,24 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
    vector_io_adapter.cache["db1"] = fake_index
    # Various document_id scenarios that shouldn't crash
    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    chunks = [
-        Chunk(content="has doc_id in metadata", metadata={"document_id": "doc-1"}),
+        Chunk(
-        Chunk(content="no doc_id anywhere", metadata={"source": "test"}),
+            content="has doc_id in metadata",
-        Chunk(content="doc_id in chunk_metadata", chunk_metadata=ChunkMetadata(document_id="doc-3")),
+            chunk_id=generate_chunk_id("doc-1", "has doc_id in metadata"),
            metadata={"document_id": "doc-1"},
        ),
        Chunk(
            content="no doc_id anywhere",
            chunk_id=generate_chunk_id("unknown", "no doc_id anywhere"),
            metadata={"source": "test"},
        ),
        Chunk(
            content="doc_id in chunk_metadata",
            chunk_id=generate_chunk_id("doc-3", "doc_id in chunk_metadata"),
            chunk_metadata=ChunkMetadata(document_id="doc-3"),
        ),
    ]
    # Should work without KeyError
@ -151,7 +165,9 @@ async def test_document_id_with_invalid_type_raises_error():
    from llama_stack.apis.vector_io import Chunk
    # Integer document_id should raise TypeError
-    chunk = Chunk(content="test", metadata={"document_id": 12345})
+    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345})
    with pytest.raises(TypeError) as exc_info:
        _ = chunk.document_id
    assert "metadata['document_id'] must be a string" in str(exc_info.value)
@ -159,7 +175,9 @@ async def test_document_id_with_invalid_type_raises_error():
 async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
-    expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
+    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
    expected = QueryChunksResponse(chunks=[Chunk(content="c1", chunk_id=generate_chunk_id("test", "c1"))], scores=[0.1])
    fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
    vector_io_adapter.cache["db1"] = fake_index
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@ -18,13 +18,12 @@ from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 def test_generate_chunk_id():
-    chunks = [
+    """Test that generate_chunk_id produces expected hashes."""
-        Chunk(content="test", metadata={"document_id": "doc-1"}),
+    chunk_id1 = generate_chunk_id("doc-1", "test")
-        Chunk(content="test ", metadata={"document_id": "doc-1"}),
+    chunk_id2 = generate_chunk_id("doc-1", "test ")
-        Chunk(content="test 3", metadata={"document_id": "doc-1"}),
+    chunk_id3 = generate_chunk_id("doc-1", "test 3")
    ]
-    chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
+    chunk_ids = sorted([chunk_id1, chunk_id2, chunk_id3])
    assert chunk_ids == [
        "31d1f9a3-c8d2-66e7-3c37-af2acd329778",
        "d07dade7-29c0-cda7-df29-0249a1dcbc3e",
@ -33,42 +32,49 @@ def test_generate_chunk_id():
 def test_generate_chunk_id_with_window():
-    chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
+    """Test that generate_chunk_id with chunk_window produces different IDs."""
    # Create a chunk object to match the original test behavior (passing object to generate_chunk_id)
    chunk = Chunk(content="test", chunk_id="placeholder", metadata={"document_id": "doc-1"})
    chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
    chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
-    assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
+    # Verify that different windows produce different IDs
-    assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
+    assert chunk_id1 != chunk_id2
    assert len(chunk_id1) == 36  # Valid UUID format
    assert len(chunk_id2) == 36  # Valid UUID format
-def test_chunk_id():
+def test_chunk_creation_with_explicit_id():
-    # Test with existing chunk ID
+    """Test that chunks can be created with explicit chunk_id."""
-    chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
+    chunk_id = generate_chunk_id("doc-1", "test")
-    assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
+    chunk = Chunk(
    # Test with document ID in metadata
    chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
    assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test")
    # Test chunks with ChunkMetadata
    chunk_with_metadata = Chunk(
        content="test",
-        metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"},
+        chunk_id=chunk_id,
        metadata={"document_id": "doc-1"},
    )
    assert chunk.chunk_id == chunk_id
    assert chunk.chunk_id == "31d1f9a3-c8d2-66e7-3c37-af2acd329778"
 def test_chunk_with_metadata():
    """Test chunks with ChunkMetadata."""
    chunk_id = "chunk-id-1"
    chunk = Chunk(
        content="test",
        chunk_id=chunk_id,
        metadata={"document_id": "existing-id"},
        chunk_metadata=ChunkMetadata(document_id="document_1"),
    )
-    assert chunk_with_metadata.chunk_id == "chunk-id-1"
+    assert chunk.chunk_id == "chunk-id-1"
-
+    assert chunk.document_id == "existing-id"  # metadata takes precedence
    # Test with no ID or document ID
    chunk_without_id = Chunk(content="test")
    generated_id = chunk_without_id.chunk_id
    assert isinstance(generated_id, str) and len(generated_id) == 36  # Should be a valid UUID
-def test_stored_chunk_id_alias():
+def test_chunk_serialization():
-    # Test with existing chunk ID alias
+    """Test that chunk_id is properly serialized."""
-    chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"})
+    chunk = Chunk(
-    assert chunk_with_alias.chunk_id == "chunk-id-1"
+        content="test",
-    serialized_chunk = chunk_with_alias.model_dump()
+        chunk_id="test-chunk-id",
-    assert serialized_chunk["stored_chunk_id"] == "chunk-id-1"
+        metadata={"document_id": "doc-1"},
-    # showing chunk_id is not serialized (i.e., a computed field)
+    )
-    assert "chunk_id" not in serialized_chunk
+    serialized_chunk = chunk.model_dump()
-    assert chunk_with_alias.stored_chunk_id == "chunk-id-1"
+    assert serialized_chunk["chunk_id"] == "test-chunk-id"
    assert "chunk_id" in serialized_chunk
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@ -41,6 +41,7 @@ class TestRagQuery:
        interleaved_content = MagicMock()
        chunk = Chunk(
            content=interleaved_content,
            chunk_id="chunk1",
            metadata={
                "key1": "value1",
                "token_count": 10,
@ -48,7 +49,6 @@ class TestRagQuery:
                # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
                "document_id": "doc1",
            },
            stored_chunk_id="chunk1",
            chunk_metadata=chunk_metadata,
        )
@ -101,8 +101,8 @@ class TestRagQuery:
        )
        chunk1 = Chunk(
            content="chunk from db1",
            chunk_id="c1",
            metadata={"vector_store_id": "db1", "document_id": "doc1"},
            stored_chunk_id="c1",
            chunk_metadata=chunk_metadata1,
        )
@ -114,8 +114,8 @@ class TestRagQuery:
        )
        chunk2 = Chunk(
            content="chunk from db2",
            chunk_id="c2",
            metadata={"vector_store_id": "db2", "document_id": "doc2"},
            stored_chunk_id="c2",
            chunk_metadata=chunk_metadata2,
        )
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@ -26,6 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
    content_from_doc,
    make_overlapped_chunks,
 )
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
 # Depending on the machine, this can get parsed a couple of ways
@ -53,6 +54,7 @@ class TestChunk:
    def test_chunk(self):
        chunk = Chunk(
            content="Example chunk content",
            chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
            metadata={"key": "value"},
            embedding=[0.1, 0.2, 0.3],
        )
@ -63,6 +65,7 @@ class TestChunk:
        chunk_no_embedding = Chunk(
            content="Example chunk content",
            chunk_id=generate_chunk_id("test-doc", "Example chunk content"),
            metadata={"key": "value"},
        )
        assert chunk_no_embedding.embedding is None
@ -218,8 +221,8 @@ class TestVectorStoreWithIndex:
        )
        chunks = [
-            Chunk(content="Test 1", embedding=None, metadata={}),
+            Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
-            Chunk(content="Test 2", embedding=None, metadata={}),
+            Chunk(content="Test 2", chunk_id=generate_chunk_id("test-doc", "Test 2"), embedding=None, metadata={}),
        ]
        mock_inference_api.openai_embeddings.return_value.data = [
@ -254,8 +257,18 @@ class TestVectorStoreWithIndex:
        )
        chunks = [
-            Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3], metadata={}),
+            Chunk(
-            Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
+                content="Test 1",
                chunk_id=generate_chunk_id("test-doc", "Test 1"),
                embedding=[0.1, 0.2, 0.3],
                metadata={},
            ),
            Chunk(
                content="Test 2",
                chunk_id=generate_chunk_id("test-doc", "Test 2"),
                embedding=[0.4, 0.5, 0.6],
                metadata={},
            ),
        ]
        await vector_store_with_index.insert_chunks(chunks)
@ -279,25 +292,47 @@ class TestVectorStoreWithIndex:
        # Verify Chunk raises ValueError for invalid embedding type
        with pytest.raises(ValueError, match="Input should be a valid list"):
-            Chunk(content="Test 1", embedding="invalid_type", metadata={})
+            Chunk(
                content="Test 1",
                chunk_id=generate_chunk_id("test-doc", "Test 1"),
                embedding="invalid_type",
                metadata={},
            )
        # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
        with pytest.raises(ValueError, match="Input should be a valid list"):
            await vector_store_with_index.insert_chunks(
                [
-                    Chunk(content="Test 1", embedding=None, metadata={}),
+                    Chunk(
-                    Chunk(content="Test 2", embedding="invalid_type", metadata={}),
+                        content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}
                    ),
                    Chunk(
                        content="Test 2",
                        chunk_id=generate_chunk_id("test-doc", "Test 2"),
                        embedding="invalid_type",
                        metadata={},
                    ),
                ]
            )
        # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
        with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
            await vector_store_with_index.insert_chunks(
-                Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
+                Chunk(
                    content="Test 1",
                    chunk_id=generate_chunk_id("test-doc", "Test 1"),
                    embedding=[0.1, "string", 0.3],
                    metadata={},
                )
            )
        chunks_wrong_dim = [
-            Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
+            Chunk(
                content="Test 1",
                chunk_id=generate_chunk_id("test-doc", "Test 1"),
                embedding=[0.1, 0.2, 0.3, 0.4],
                metadata={},
            ),
        ]
        with pytest.raises(ValueError, match="has dimension 4, expected 3"):
            await vector_store_with_index.insert_chunks(chunks_wrong_dim)
@ -317,9 +352,14 @@ class TestVectorStoreWithIndex:
        )
        chunks = [
-            Chunk(content="Test 1", embedding=None, metadata={}),
+            Chunk(content="Test 1", chunk_id=generate_chunk_id("test-doc", "Test 1"), embedding=None, metadata={}),
-            Chunk(content="Test 2", embedding=[0.2, 0.2, 0.2], metadata={}),
+            Chunk(
-            Chunk(content="Test 3", embedding=None, metadata={}),
+                content="Test 2",
                chunk_id=generate_chunk_id("test-doc", "Test 2"),
                embedding=[0.2, 0.2, 0.2],
                metadata={},
            ),
            Chunk(content="Test 3", chunk_id=generate_chunk_id("test-doc", "Test 3"), embedding=None, metadata={}),
        ]
        mock_inference_api.openai_embeddings.return_value.data = [