feat(test): record agents, safety and vector_io integration tests (#2952)

Continue to build on top of https://github.com/meta-llama/llama-stack/pull/2941 ## Test Plan Run server with `LLAMA_STACK_TEST_INFERENCE_MODE=record` and then run the integration tests with `--stack-config=server:starter`. Then restart the server with `LLAMA_STACK_TEST_INFERENCE_MODE=replay` and re-run the tests. Verify that no request hit Ollama at any point.
2025-12-10 03:30:58 +00:00 · 2025-07-29 14:02:14 -07:00 · 2025-07-29 14:02:14 -07:00 · 072d20a124
commit 072d20a124
parent 2d1ab3ca55
59 changed files with 22727 additions and 9 deletions
--- a/tests/integration/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/recordings/responses/d0ac68cbde69.json
@ -13,12 +13,12 @@
      "__data__": {
        "models": [
          {
-            "model": "llama3.2:3b-instruct-fp16",
-            "name": "llama3.2:3b-instruct-fp16",
-            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
-            "expires_at": "2025-07-29T11:53:06.458806-07:00",
-            "size": 8581748736,
-            "size_vram": 8581748736,
+            "model": "llama-guard3:1b",
+            "name": "llama-guard3:1b",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "expires_at": "2025-07-29T14:32:56.756471-07:00",
+            "size": 2770397184,
+            "size_vram": 2770397184,
            "details": {
              "parent_model": "",
              "format": "gguf",
@ -26,7 +26,25 @@
              "families": [
                "llama"
              ],
-              "parameter_size": "3.2B",
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            }
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "name": "all-minilm:l6-v2",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "expires_at": "2025-07-29T13:38:34.021809-07:00",
+            "size": 590204928,
+            "size_vram": 590204928,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
              "quantization_level": "F16"
            }
          }