feat(tests): record responses for evals and telemetry tests (#2954)

Continuing with https://github.com/meta-llama/llama-stack/pull/2952 This also includes a "fix" to inference store related tests so that we pull a large number of inference responses from the DB so as to always find the one we just wrote.
2025-12-10 03:30:58 +00:00 · 2025-07-29 15:46:21 -07:00 · 2025-07-29 15:46:21 -07:00 · 0ac503ec0d
commit 0ac503ec0d
parent 81c7d6fa2e
13 changed files with 881 additions and 41 deletions
--- a/tests/integration/recordings/responses/44fb9cf5875f.json
+++ b/tests/integration/recordings/responses/44fb9cf5875f.json
@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nTest trace 1<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T21:56:49.398099Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 2251794875,
+        "load_duration": 108848667,
+        "prompt_eval_count": 20,
+        "prompt_eval_duration": 82008917,
+        "eval_count": 51,
+        "eval_duration": 2060141416,
+        "response": "It seems like you're trying to test the system, but I'm not sure what specific functionality or feature you'd like to test. Could you please provide more context or clarify what you're looking for? I'll do my best to assist you!",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}