feat(test): record agents, safety and vector_io integration tests (#2952)

Continue to build on top of
https://github.com/meta-llama/llama-stack/pull/2941

## Test Plan

Run server with `LLAMA_STACK_TEST_INFERENCE_MODE=record` and then run
the integration tests with `--stack-config=server:starter`. Then restart
the server with `LLAMA_STACK_TEST_INFERENCE_MODE=replay` and re-run the
tests. Verify that no request hit Ollama at any point.
This commit is contained in:
Ashwin Bharambe 2025-07-29 14:02:14 -07:00 committed by GitHub
parent 2d1ab3ca55
commit 072d20a124
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
59 changed files with 22727 additions and 9 deletions

View file

@ -13,12 +13,12 @@
"__data__": {
"models": [
{
"model": "llama3.2:3b-instruct-fp16",
"name": "llama3.2:3b-instruct-fp16",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"expires_at": "2025-07-29T11:53:06.458806-07:00",
"size": 8581748736,
"size_vram": 8581748736,
"model": "llama-guard3:1b",
"name": "llama-guard3:1b",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"expires_at": "2025-07-29T14:32:56.756471-07:00",
"size": 2770397184,
"size_vram": 2770397184,
"details": {
"parent_model": "",
"format": "gguf",
@ -26,7 +26,25 @@
"families": [
"llama"
],
"parameter_size": "3.2B",
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
},
{
"model": "all-minilm:l6-v2",
"name": "all-minilm:l6-v2",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"expires_at": "2025-07-29T13:38:34.021809-07:00",
"size": 590204928,
"size_vram": 590204928,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
}