feat(ci): use replay mode, setup ollama if specific label exists on PR (#2955)

This PR makes setting up Ollama optional for CI. By default, we use `replay` mode for inference requests and use the stored results from the `tests/integration/recordings/` directory. Every so often, users will update tests which will need us to re-record. To do this, we check for the existence of a label `re-record-tests` on the PR. If detected, - ollama is spun up - inference mode is set to record - after the tests are done, if any new changes are detected, they are pushed back to the PR ## Test Plan This is GitHub CI. Gotta test it live.
2025-12-03 18:00:36 +00:00 · 2025-07-29 16:50:26 -07:00 · 2025-07-29 16:50:26 -07:00 · b237df8f18
commit b237df8f18
parent 0ac503ec0d
11 changed files with 1519 additions and 13 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -7,6 +7,7 @@ on:
    branches: [ main ]
  pull_request:
    branches: [ main ]
    types: [opened, synchronize, reopened, labeled, unlabeled]
    paths:
      - 'llama_stack/**'
      - 'tests/**'
@ -39,6 +40,8 @@ jobs:
    runs-on: ubuntu-latest
    outputs:
      test-type: ${{ steps.generate-matrix.outputs.test-type }}
      rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@ -52,10 +55,30 @@ jobs:
            sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-type=$TEST_TYPES" >> $GITHUB_OUTPUT
      - name: Check if re-record-tests label exists
        id: check-rerecord-tests
        run: |
          if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
            echo "rerecord-tests=true" >> $GITHUB_OUTPUT
          else
            echo "rerecord-tests=false" >> $GITHUB_OUTPUT
          fi
  test-matrix:
    needs: discover-tests
    runs-on: ubuntu-latest
    permissions:
      # Set write permissions since we might need to commit recordings
      contents: write
      pull-requests: write
    env:
      # Create reusable variable for the re-record tests condition
      SHOULD_RECORD: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
      # TODO: set up another var to track whether we need ollama or not
      # not every matrix type needs ollama
    strategy:
      fail-fast: false
      matrix:
@ -74,6 +97,16 @@ jobs:
            test-type: tool_runtime
    steps:
      - name: Debug
        run: |
          echo "test-type: ${{ matrix.test-type }}"
          echo "client-type: ${{ matrix.client-type }}"
          echo "provider: ${{ matrix.provider }}"
          echo "python-version: ${{ matrix.python-version }}"
          echo "client-version: ${{ matrix.client-version }}"
          echo "SHOULD_RECORD: ${{ env.SHOULD_RECORD }}"
          echo "rerecord-tests: ${{ needs.discover-tests.outputs.rerecord-tests }}"
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@ -84,7 +117,7 @@ jobs:
          client-version: ${{ matrix.client-version }}
      - name: Setup ollama
-        if: ${{ matrix.provider == 'ollama' }}
+        if: ${{ matrix.provider == 'ollama' && env.SHOULD_RECORD == 'true' }}
        uses: ./.github/actions/setup-ollama
      - name: Setup vllm
@ -116,6 +149,14 @@ jobs:
          fi
          EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
          export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings"
          if [ "$SHOULD_RECORD" == "true" ]; then
            export LLAMA_STACK_TEST_INFERENCE_MODE="record"
          else
            export LLAMA_STACK_TEST_INFERENCE_MODE="replay"
          fi
          if [ "${{ matrix.provider }}" == "ollama" ]; then
            export OLLAMA_URL="http://0.0.0.0:11434"
            export TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16
@ -129,7 +170,6 @@ jobs:
            EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
          fi
          uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
            -k "not( ${EXCLUDE_TESTS} )" \
            --text-model=$TEXT_MODEL \
@ -137,6 +177,20 @@ jobs:
            --color=yes ${EXTRA_PARAMS} \
            --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
      - name: Update the PR if tests/integration/recordings/ has changed
        if: ${{ env.SHOULD_RECORD == 'true' }}
        run: |
          if ! git diff --quiet tests/integration/recordings/; then
            echo "Updating PR with updated recordings"
            git config --local user.email "github-actions[bot]@users.noreply.github.com"
            git config --local user.name "github-actions[bot]"
            git add tests/integration/recordings/
            git commit -m "Update recordings from integration tests"
            git push origin HEAD:${{ github.head_ref }}
          else
            echo "No changes to recordings detected"
          fi
      - name: Check Storage and Memory Available After Tests
        if: ${{ always() }}
        run: |
@ -144,13 +198,13 @@ jobs:
          df -h
      - name: Write inference logs to file
-        if: ${{ always() }}
+        if: ${{ env.SHOULD_RECORD == 'true' }}
        run: |
          sudo docker logs ollama > ollama.log || true
          sudo docker logs vllm > vllm.log || true
      - name: Upload all logs to artifacts
-        if: ${{ always() }}
+        if: ${{ env.SHOULD_RECORD == 'true' }}
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
--- a/tests/integration/recordings/index.sqlite
+++ b/tests/integration/recordings/index.sqlite
--- a/tests/integration/recordings/responses/0ff78129bb3a.json
+++ b/tests/integration/recordings/responses/0ff78129bb3a.json
@ -0,0 +1,167 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\nReturns the boiling point of a liquid in Celsius or Fahrenheit.\n\n:param liquid_name: The name of the liquid\n:param celsius: Whether to return the boiling point in Celsius\n:return: The boiling point of the liquid in Celcius or Fahrenheit\n\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.143606Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "How",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.186151Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " can",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.229036Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " I",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.271516Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " assist",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.316272Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " you",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.361005Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " further",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.404689Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "?",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.447699Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 456939083,
          "load_duration": 79653292,
          "prompt_eval_count": 471,
          "prompt_eval_duration": 71724667,
          "eval_count": 8,
          "eval_duration": 304859000,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/4597743bcd2a.json
+++ b/tests/integration/recordings/responses/4597743bcd2a.json
@ -0,0 +1,185 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\nReturns the boiling point of a liquid in Celsius or Fahrenheit.\n\n:param liquid_name: The name of the liquid\n:param celsius: Whether to return the boiling point in Celsius\n:return: The boiling point of the liquid in Celcius or Fahrenheit\n\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.476678Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[g",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.520346Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "reet",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.563375Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_every",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.606256Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "one",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.649215Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "(url",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.692049Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "=\"",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.734316Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "world",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.776615Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "\")]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:17.819266Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 5629478417,
          "load_duration": 4092162625,
          "prompt_eval_count": 448,
          "prompt_eval_duration": 1191158583,
          "eval_count": 9,
          "eval_duration": 343915792,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/recordings/responses/4a3a4447b16b.json
@ -14,7 +14,7 @@
        "models": [
          {
            "model": "nomic-embed-text:latest",
-            "modified_at": "2025-07-29T14:55:45.755906-07:00",
+            "modified_at": "2025-07-29T16:46:26.304701-07:00",
            "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
            "size": 274302450,
            "details": {
--- a/tests/integration/recordings/responses/9c28ec9ac338.json
+++ b/tests/integration/recordings/responses/9c28ec9ac338.json
@ -0,0 +1,347 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        \",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHow can I assist you further?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the boiling point of polyjuice? Use tools to answer.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.316207Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.358611Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "get",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.401272Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_bo",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.444321Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "iling",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.48795Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_point",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.530158Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "(",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.573318Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "liquid",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.616297Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_name",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.659527Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "='",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.702422Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "poly",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.745894Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "ju",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.788811Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "ice",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.831618Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "',",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.874469Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " c",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.917372Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "elsius",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.960558Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "=True",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:37.004223Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ")]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:37.046563Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 845522667,
          "load_duration": 47784875,
          "prompt_eval_count": 511,
          "prompt_eval_duration": 66135292,
          "eval_count": 18,
          "eval_duration": 730999291,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/bd356b27a085.json
+++ b/tests/integration/recordings/responses/bd356b27a085.json
@ -0,0 +1,167 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        \",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.850399Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "How",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.89419Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " can",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.938049Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " I",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.980392Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " assist",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.023004Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " you",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.065467Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " further",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.108189Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "?",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:36.150902Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 468910417,
          "load_duration": 93969000,
          "prompt_eval_count": 479,
          "prompt_eval_duration": 72596750,
          "eval_count": 8,
          "eval_duration": 301590375,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/c7582fa7c2c4.json
+++ b/tests/integration/recordings/responses/c7582fa7c2c4.json
@ -0,0 +1,347 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\nReturns the boiling point of a liquid in Celsius or Fahrenheit.\n\n:param liquid_name: The name of the liquid\n:param celsius: Whether to return the boiling point in Celsius\n:return: The boiling point of the liquid in Celcius or Fahrenheit\n\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[greet_everyone(url=\"world\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nHello, world!<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHow can I assist you further?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the boiling point of polyjuice? Use tools to answer.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.64197Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.687885Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "get",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.73112Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_bo",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.774191Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "iling",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.816695Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_point",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.859121Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "(",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.901585Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "liquid",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.943788Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_name",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:18.986429Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "='",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.029894Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "poly",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.073113Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "ju",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.116671Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "ice",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.159456Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "',",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.203354Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " c",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.246192Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "elsius",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.290499Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "=True",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.334562Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ")]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:26:19.380415Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 881889250,
          "load_duration": 69966916,
          "prompt_eval_count": 503,
          "prompt_eval_duration": 70368167,
          "eval_count": 18,
          "eval_duration": 740885458,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/ef757a75ed08.json
+++ b/tests/integration/recordings/responses/ef757a75ed08.json
@ -0,0 +1,185 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\n        Returns the boiling point of a liquid in Celsius or Fahrenheit.\n\n        :param liquid_name: The name of the liquid\n        :param celsius: Whether to return the boiling point in Celsius\n        :return: The boiling point of the liquid in Celcius or Fahrenheit\n        \",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.212563Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[g",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.254896Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "reet",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.297152Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_every",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.339477Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "one",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.382245Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "(url",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.423387Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "=\"",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.465286Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "world",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.507249Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "\")]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T23:46:35.549072Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 5519843458,
          "load_duration": 4110366375,
          "prompt_eval_count": 456,
          "prompt_eval_duration": 1070783708,
          "eval_count": 9,
          "eval_duration": 337120750,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/f3c3afbd9b7e.json
+++ b/tests/integration/recordings/responses/f3c3afbd9b7e.json
@ -0,0 +1,59 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:1b",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"greet_everyone\",\n        \"description\": \"\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    },\n    {\n        \"name\": \"get_boiling_point\",\n        \"description\": \"\nReturns the boiling point of a liquid in Celsius or Fahrenheit.\n\n:param liquid_name: The name of the liquid\n:param celsius: Whether to return the boiling point in Celsius\n:return: The boiling point of the liquid in Celcius or Fahrenheit\n\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"liquid_name\", \"celsius\"],\n            \"properties\": {\n                \"liquid_name\": {\n                    \"type\": \"string\",\n                    \"description\": \"\"\n                },\n                \"celsius\": {\n                    \"type\": \"boolean\",\n                    \"description\": \"\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSay hi to the world. Use tools to do so.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:1b"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:1b",
          "created_at": "2025-07-29T23:23:09.553247Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "Hi",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:1b",
          "created_at": "2025-07-29T23:23:09.564069Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 2125493250,
          "load_duration": 1610279708,
          "prompt_eval_count": 448,
          "prompt_eval_duration": 502413125,
          "eval_count": 2,
          "eval_duration": 11573709,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@ -10,7 +10,6 @@ import pytest
 from llama_stack_client import Agent
 from llama_stack import LlamaStackAsLibraryClient
 from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import AuthenticationRequiredError
 AUTH_TOKEN = "test-token"
@ -24,7 +23,7 @@ def mcp_server():
        yield mcp_server_info
-def test_mcp_invocation(llama_stack_client, mcp_server):
+def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
    if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
        pytest.skip("The local MCP server only reliably reachable from library client.")
@ -69,14 +68,10 @@ def test_mcp_invocation(llama_stack_client, mcp_server):
    assert content[0].type == "text"
    assert content[0].text == "Hello, world!"
-    models = [
+    print(f"Using model: {text_model_id}")
        m for m in llama_stack_client.models.list() if m.model_type == ModelType.llm and "guard" not in m.identifier
    ]
    model_id = models[0].identifier
    print(f"Using model: {model_id}")
    agent = Agent(
        client=llama_stack_client,
-        model=model_id,
+        model=text_model_id,
        instructions="You are a helpful assistant.",
        tools=[test_toolgroup_id],
    )