feat(tests): record responses for evals and telemetry tests (#2954)

Continuing with https://github.com/meta-llama/llama-stack/pull/2952 This also includes a "fix" to inference store related tests so that we pull a large number of inference responses from the DB so as to always find the one we just wrote.
2025-12-09 03:19:20 +00:00 · 2025-07-29 15:46:21 -07:00 · 2025-07-29 15:46:21 -07:00 · 0ac503ec0d
commit 0ac503ec0d
parent 81c7d6fa2e
13 changed files with 881 additions and 41 deletions
--- a/tests/integration/recordings/responses/dac7a32e5db9.json
+++ b/tests/integration/recordings/responses/dac7a32e5db9.json
@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the capital of France?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:08.784695Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 420724000,
+        "load_duration": 57238084,
+        "prompt_eval_count": 23,
+        "prompt_eval_duration": 72133167,
+        "eval_count": 8,
+        "eval_duration": 290696708,
+        "response": "The capital of France is Paris.",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}