feat(test): record agents, safety and vector_io integration tests (#2952)

Continue to build on top of https://github.com/meta-llama/llama-stack/pull/2941 ## Test Plan Run server with `LLAMA_STACK_TEST_INFERENCE_MODE=record` and then run the integration tests with `--stack-config=server:starter`. Then restart the server with `LLAMA_STACK_TEST_INFERENCE_MODE=replay` and re-run the tests. Verify that no request hit Ollama at any point.
2025-10-15 06:37:58 +00:00 · 2025-07-29 14:02:14 -07:00 · 2025-07-29 14:02:14 -07:00 · 072d20a124
commit 072d20a124
parent 2d1ab3ca55
59 changed files with 22727 additions and 9 deletions
--- a/tests/integration/recordings/responses/a59d0d7c1485.json
+++ b/tests/integration/recordings/responses/a59d0d7c1485.json
@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nTask: Check if there is unsafe content in 'Assistant' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: What is the boiling point of the liquid polyjuice in celsius?\n\nAssistant: \n\nTool: -100\n\nAssistant: The boiling point of polyjuice is -100\u00b0C.\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST Assistant message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama-guard3:1b",
+        "created_at": "2025-07-29T20:04:27.609269Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 326238958,
+        "load_duration": 79782250,
+        "prompt_eval_count": 238,
+        "prompt_eval_duration": 233571958,
+        "eval_count": 2,
+        "eval_duration": 12258959,
+        "response": "safe",
+        "thinking": null,
+        "context": null
+      }
+    },
+    "is_streaming": false
+  }
+}