From 0ac503ec0d4f468104efb93c1e4bfc236e7e2aef Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Tue, 29 Jul 2025 15:46:21 -0700
Subject: [PATCH] feat(tests): record responses for evals and telemetry tests
 (#2954)

Continuing with https://github.com/meta-llama/llama-stack/pull/2952

This also includes a "fix" to inference store related tests so that we
pull a large number of inference responses from the DB so as to always
find the one we just wrote.
---
 .../inference/test_openai_completion.py       |   4 +-
 tests/integration/recordings/index.sqlite     | Bin 36864 -> 40960 bytes
 .../recordings/responses/04172112ffbb.json    | 347 ++++++++++++++++++
 .../recordings/responses/1b92be674e2a.json    |  39 ++
 .../recordings/responses/44fb9cf5875f.json    |  39 ++
 .../recordings/responses/4a3a4447b16b.json    |   2 +-
 .../recordings/responses/7354ec181984.json    |  39 ++
 .../recordings/responses/7e6806cba34a.json    | 257 +++++++++++++
 .../recordings/responses/ae1c22f18ecc.json    |  39 ++
 .../recordings/responses/b14ff438ca99.json    |  39 ++
 .../recordings/responses/d0ac68cbde69.json    |  39 +-
 .../recordings/responses/dac7a32e5db9.json    |  39 ++
 .../recordings/responses/ed9e9b34008d.json    |  39 ++
 13 files changed, 881 insertions(+), 41 deletions(-)
 create mode 100644 tests/integration/recordings/responses/04172112ffbb.json
 create mode 100644 tests/integration/recordings/responses/1b92be674e2a.json
 create mode 100644 tests/integration/recordings/responses/44fb9cf5875f.json
 create mode 100644 tests/integration/recordings/responses/7354ec181984.json
 create mode 100644 tests/integration/recordings/responses/7e6806cba34a.json
 create mode 100644 tests/integration/recordings/responses/ae1c22f18ecc.json
 create mode 100644 tests/integration/recordings/responses/b14ff438ca99.json
 create mode 100644 tests/integration/recordings/responses/dac7a32e5db9.json
 create mode 100644 tests/integration/recordings/responses/ed9e9b34008d.json

diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index f0311715a..51a208b17 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -345,7 +345,7 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
         response_id = response.id
         content = response.choices[0].message.content
 
-    responses = client.chat.completions.list()
+    responses = client.chat.completions.list(limit=1000)
     assert response_id in [r.id for r in responses.data]
 
     retrieved_response = client.chat.completions.retrieve(response_id)
@@ -410,7 +410,7 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
         response_id = response.id
         content = response.choices[0].message.content
 
-    responses = client.chat.completions.list()
+    responses = client.chat.completions.list(limit=1000)
     assert response_id in [r.id for r in responses.data]
 
     retrieved_response = client.chat.completions.retrieve(response_id)
diff --git a/tests/integration/recordings/index.sqlite b/tests/integration/recordings/index.sqlite
index ee73ce7565278ea41db53f70719a8e78723f6cd7..65cb5aed017eee9a12054db5e3705a2ecfe96a65 100644
GIT binary patch
delta 2087
zcmcJQ&5KoK9LMju_uTh;&Yebs!6uUyNoD!GzuaubLdIZ%W+fqF^Smy?!~qqlh4WGn
z38Ulr2eRB4EwoDuOLImdB0?&pvY<t)VS)&<MQ_q`?|AXL5pig9;5_H|{e6C~=b3&x
zKfRbg@j&r%E|;q<Y#NO&pPM}09Eh9C;UHIQ{?)wR{I$8<UO#ZP*$5}qxzX?}&5HK*
zhZ0b#&*z4sm8?AR;BfhHX{EHc_)T%Fc`#j0_at8>JCaY+pA(wUg{fz=*@+`@F#oN6
zV}djWbL%TJ_1THaOtSUyCvZg*(ms3aLO!hp?@ty+a;0na6NNXj#^lh3cz5IXbXVF;
zK2K(nR%yKPe#133#><6~tTnlz7O$(`SG-vHqVQ$gPX3TTIu_3)c|yFF##6zm=h|6k
zp#V||Ca__YBWSgUKxzmzW{_J%FmptC?7h;I(P*jb1`rr*A@W{ptc~JOLtq%T7!vLQ
z#MBAEB&3ESW2rHY9;LwrupTSt36Cyy{cy*EozV&t70g3y)B-M`6BtPXJmY{t104bw
za*u@|oFi@|vJwgjVD!nV8%&8aP(Ue&3UM*#k!IR>D7h6(87;U2oKUNg)=p9^6mbX=
z2(2+v%IHGZ57ZKA$9VXrFpvN!twcx?3g#5nA$X1fqM?AmLqZ_4*it7Mv=j(LQ1nsz
zlsywgA7<lzEIOMlZ5nHT@3&M7TMAdQ@`25d6s|Nj)vv|(*H4yytdG~@+Qr)8+Sc;f
z_}%Jq^;D@;-CbQ5?`c1O;MKxvzm$Upp%8#G24Y98C5|~Q90yQ(sT|RYXl9gS2)NKU
z!U5+pR7?wnjEfe#iG_3T2^L04843V_L&Stm6AOrwKyXH>BwQc~oRESN<&HY;yf6*`
zs<DhNcKtXj0|XNQgbnp4goZc-1=I+Gr56}uqqIhZ3hX+j%6p`k2&E>LQx<*Pbz_y`
z3ZsyYmXd_AK`?ZMDN1B8<q&d!HwG3LAWWtejzP>&;6mZO<;Fzk+bai;6{9olKOcUp
zcyc=4Hk><>tG`+Mr|8S?l-I@Im0mCYQG2EOQ}u-kY)(}c<L3(J(|v`<(!cYq{0GUm
z$@a#3jrGy#D9s&Nm=Z6}49<;qmTvPXuHM6R-!|O+`fOk)e+jlIYY`F{BR$4iC%<3c
z$H0N{^i4+P9=d*8)BE)uMy;C%hKJ@r_S(Sy&MR#ZdT5Sj|4!^L-+XO<Hv3Cu*Wlc?
zUKso?w)0NQ*{9FG9_Ev%8wO$_)u(!lwGL*#{#H4m42)KfvDUvszy1~jbQrS7xSP$m
z(_-}NJB&_KSPO`3-~2-KXm(?M-)fsM&8ChYPm^$~<kPFU=F92TbmZLBP`WTZ)Y*mq
rE$O|h0d<morkC_uFQq<x*4!yp5u}6I-b1YQKia3ymUf8MvJ(Cum=HG=

delta 422
zcmZoTz|^pSX@WHCN(Kf7&WQ^4j4L-LEa4Ym=6?qiVBqBc#s7u>9k0M<5rHfGlN*HC
z*npyJO~#wQ$nzSoF%~h1GBizQ50I5%Xkz%qUdHx{EspgPs~ckxe<5Em?-9O7yr#US
z6APUtHw9F%ls0K_Z2lKu!o$vVfuoVTDPVFzUniTPC6MVoIk*$b^qTw#%w%9-l$(5{
z11jo1xv;B~&(M+!Xul@QLI$>*+!H4AB}j<zeC7+|<L5oW+sJFh=FPK)CzVHz<;7%&
z1SvL7LDsz-rzS@!tlP|yXvVbJPC1!z^TL#sjEowSz0=$nl_wumaNB$%O^1_NgXPs^
z_5yV+9tG~3+>_XDb9-~MaP8$P<I-o}!1;o6AsZWK6sIu988%<`?;Nchb}X+p7EWT>
z{Hmmzm2uhTpGsY<lNwk|B$$pd@IK%T;W@!$!@Yr9g=+>Y6T2PTdX{1~1=fjNoSao0
zui4M?7jncfKVc5#`^}fbbZlc`AJb;7=GPpXU1N7MP0s0?$f!2?UtbQpft9hbm9fd>
H!v1~$1b2Z9

diff --git a/tests/integration/recordings/responses/04172112ffbb.json b/tests/integration/recordings/responses/04172112ffbb.json
new file mode 100644
index 000000000..739c74ffc
--- /dev/null
+++ b/tests/integration/recordings/responses/04172112ffbb.json
@@ -0,0 +1,347 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is 2 + 2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThe answer to 2 + 2 is 4.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nTell me a short joke<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.453648Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "Here",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.498568Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "'s",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.542162Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " one",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.583708Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ":\n\n",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.624941Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "What",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.666128Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " do",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.707429Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " you",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.748343Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " call",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.789526Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " a",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.830744Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " fake",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.871855Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " nood",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.913174Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "le",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.954413Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "?\n\n",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.995741Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "An",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:44.036825Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " imp",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:44.078477Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "asta",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:44.120217Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "!",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:44.162092Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 875451166,
+          "load_duration": 90922958,
+          "prompt_eval_count": 56,
+          "prompt_eval_duration": 69995084,
+          "eval_count": 18,
+          "eval_duration": 711865666,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
diff --git a/tests/integration/recordings/responses/1b92be674e2a.json b/tests/integration/recordings/responses/1b92be674e2a.json
new file mode 100644
index 000000000..38e2633c4
--- /dev/null
+++ b/tests/integration/recordings/responses/1b92be674e2a.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho is the CEO of Meta?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:09.951749Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1110064084,
+        "load_duration": 86978750,
+        "prompt_eval_count": 23,
+        "prompt_eval_duration": 71337125,
+        "eval_count": 24,
+        "eval_duration": 951124708,
+        "response": "Mark Zuckerberg is the founder, chairman and CEO of Meta, which he originally founded as Facebook in 2004.",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/44fb9cf5875f.json b/tests/integration/recordings/responses/44fb9cf5875f.json
new file mode 100644
index 000000000..462bd3a0c
--- /dev/null
+++ b/tests/integration/recordings/responses/44fb9cf5875f.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nTest trace 1<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:49.398099Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 2251794875,
+        "load_duration": 108848667,
+        "prompt_eval_count": 20,
+        "prompt_eval_duration": 82008917,
+        "eval_count": 51,
+        "eval_duration": 2060141416,
+        "response": "It seems like you're trying to test the system, but I'm not sure what specific functionality or feature you'd like to test. Could you please provide more context or clarify what you're looking for? I'll do my best to assist you!",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/4a3a4447b16b.json b/tests/integration/recordings/responses/4a3a4447b16b.json
index ba19718a4..cc2d61bda 100644
--- a/tests/integration/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/recordings/responses/4a3a4447b16b.json
@@ -14,7 +14,7 @@
         "models": [
           {
             "model": "nomic-embed-text:latest",
-            "modified_at": "2025-07-29T13:32:18.014755-07:00",
+            "modified_at": "2025-07-29T14:55:45.755906-07:00",
             "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
             "size": 274302450,
             "details": {
diff --git a/tests/integration/recordings/responses/7354ec181984.json b/tests/integration/recordings/responses/7354ec181984.json
new file mode 100644
index 000000000..b76c85d93
--- /dev/null
+++ b/tests/integration/recordings/responses/7354ec181984.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the smallest country in the world?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:26.022261Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 6323034167,
+        "load_duration": 107456542,
+        "prompt_eval_count": 25,
+        "prompt_eval_duration": 69246125,
+        "eval_count": 150,
+        "eval_duration": 6145775458,
+        "response": "The smallest country in the world is the Vatican City, which has a total area of approximately 0.44 km\u00b2 (0.17 sq mi). It is an independent city-state located within Rome, Italy, and is home to the Pope and the central government of the Catholic Church.\n\nTo put that into perspective, the Vatican City is smaller than a golf course! Despite its tiny size, it has its own government, currency, postal system, and even its own police force. It's also home to numerous iconic landmarks like St. Peter's Basilica and the Sistine Chapel.\n\nInterestingly, the Vatican City is not only the smallest country in the world but also the most densely populated, with a population of just over 800 people!",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/7e6806cba34a.json b/tests/integration/recordings/responses/7e6806cba34a.json
new file mode 100644
index 000000000..23eae3ac9
--- /dev/null
+++ b/tests/integration/recordings/responses/7e6806cba34a.json
@@ -0,0 +1,257 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is 2 + 2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.546801Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "The",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.588481Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " answer",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.630066Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " to",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.671027Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " ",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.712294Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "2",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.753866Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " +",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.795863Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " ",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.83722Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "2",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.878343Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " is",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.919504Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " ",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:42.960515Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "4",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.001631Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".",
+          "thinking": null,
+          "context": null
+        }
+      },
+      {
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T21:56:43.043353Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 834188250,
+          "load_duration": 75182417,
+          "prompt_eval_count": 29,
+          "prompt_eval_duration": 261107458,
+          "eval_count": 13,
+          "eval_duration": 497358667,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
+      }
+    ],
+    "is_streaming": true
+  }
+}
diff --git a/tests/integration/recordings/responses/ae1c22f18ecc.json b/tests/integration/recordings/responses/ae1c22f18ecc.json
new file mode 100644
index 000000000..b5b1604cc
--- /dev/null
+++ b/tests/integration/recordings/responses/ae1c22f18ecc.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nTest trace 0<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:47.06444Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 2620252041,
+        "load_duration": 196706333,
+        "prompt_eval_count": 20,
+        "prompt_eval_duration": 70100292,
+        "eval_count": 58,
+        "eval_duration": 2352865167,
+        "response": "I'm happy to help you with your test, but I don't see what kind of test we are testing. Could you please provide more context or clarify what kind of test you would like me to perform? Is it a programming test, a language proficiency test, or something else?",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/b14ff438ca99.json b/tests/integration/recordings/responses/b14ff438ca99.json
new file mode 100644
index 000000000..d4d24a048
--- /dev/null
+++ b/tests/integration/recordings/responses/b14ff438ca99.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the currency of Japan?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:31.274826Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 5223804916,
+        "load_duration": 61370666,
+        "prompt_eval_count": 23,
+        "prompt_eval_duration": 70195875,
+        "eval_count": 124,
+        "eval_duration": 5091701417,
+        "response": "The official currency of Japan is the Japanese yen (\u00a5). It is abbreviated as \"JPY\" and its symbol is \u00a5. The yen is divided into 100 sen, although the sen has been officially discontinued since 1967.\n\nYou can exchange your money for yen at banks, currency exchange offices, or use ATMs to withdraw cash from an ATM. Credit cards are also widely accepted in Japan, especially among major retailers and restaurants.\n\nIt's worth noting that some businesses may not accept foreign currencies other than US dollars, so it's a good idea to have some local currency on hand when traveling to Japan.",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/d0ac68cbde69.json b/tests/integration/recordings/responses/d0ac68cbde69.json
index 9b639bf6b..b37962fb6 100644
--- a/tests/integration/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/recordings/responses/d0ac68cbde69.json
@@ -11,44 +11,7 @@
     "body": {
       "__type__": "ollama._types.ProcessResponse",
       "__data__": {
-        "models": [
-          {
-            "model": "llama-guard3:1b",
-            "name": "llama-guard3:1b",
-            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
-            "expires_at": "2025-07-29T14:32:56.756471-07:00",
-            "size": 2770397184,
-            "size_vram": 2770397184,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "llama",
-              "families": [
-                "llama"
-              ],
-              "parameter_size": "1.5B",
-              "quantization_level": "Q8_0"
-            }
-          },
-          {
-            "model": "all-minilm:l6-v2",
-            "name": "all-minilm:l6-v2",
-            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
-            "expires_at": "2025-07-29T13:38:34.021809-07:00",
-            "size": 590204928,
-            "size_vram": 590204928,
-            "details": {
-              "parent_model": "",
-              "format": "gguf",
-              "family": "bert",
-              "families": [
-                "bert"
-              ],
-              "parameter_size": "23M",
-              "quantization_level": "F16"
-            }
-          }
-        ]
+        "models": []
       }
     },
     "is_streaming": false
diff --git a/tests/integration/recordings/responses/dac7a32e5db9.json b/tests/integration/recordings/responses/dac7a32e5db9.json
new file mode 100644
index 000000000..675eef3b0
--- /dev/null
+++ b/tests/integration/recordings/responses/dac7a32e5db9.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the capital of France?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:08.784695Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 420724000,
+        "load_duration": 57238084,
+        "prompt_eval_count": 23,
+        "prompt_eval_duration": 72133167,
+        "eval_count": 8,
+        "eval_duration": 290696708,
+        "response": "The capital of France is Paris.",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}
diff --git a/tests/integration/recordings/responses/ed9e9b34008d.json b/tests/integration/recordings/responses/ed9e9b34008d.json
new file mode 100644
index 000000000..36dda5ef0
--- /dev/null
+++ b/tests/integration/recordings/responses/ed9e9b34008d.json
@@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the largest planet in our solar system?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:19.673198Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 9704897750,
+        "load_duration": 74034167,
+        "prompt_eval_count": 26,
+        "prompt_eval_duration": 64475958,
+        "eval_count": 232,
+        "eval_duration": 9565656334,
+        "response": "The largest planet in our solar system is Jupiter. It is a gas giant, meaning it is primarily composed of hydrogen and helium gases. Jupiter has a diameter of approximately 142,984 kilometers (88,846 miles), which is more than 11 times the diameter of Earth.\n\nJupiter is not only the largest planet in terms of size, but also the most massive planet in our solar system, with a mass that is more than 318 times that of Earth. It has a thick atmosphere and a strong magnetic field, and is known for its distinctive banded appearance, which is caused by strong winds in the upper atmosphere.\n\nJupiter's massive size and gravitational pull have a significant impact on the surrounding space, including the orbits of nearby planets and asteroids. Its moons are also notable, with four large ones: Io, Europa, Ganymede, and Callisto, which are known as the Galilean moons due to their discovery by Galileo Galilei in 1610.\n\nJupiter is a fascinating planet that continues to be studied by astronomers and space agencies around the world, offering insights into the formation and evolution of our solar system.",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}