mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Continue to build on top of https://github.com/meta-llama/llama-stack/pull/2941 ## Test Plan Run server with `LLAMA_STACK_TEST_INFERENCE_MODE=record` and then run the integration tests with `--stack-config=server:starter`. Then restart the server with `LLAMA_STACK_TEST_INFERENCE_MODE=replay` and re-run the tests. Verify that no request hit Ollama at any point.
56 lines
1.5 KiB
JSON
56 lines
1.5 KiB
JSON
{
|
|
"request": {
|
|
"method": "POST",
|
|
"url": "http://localhost:11434/api/ps",
|
|
"headers": {},
|
|
"body": {},
|
|
"endpoint": "/api/ps",
|
|
"model": ""
|
|
},
|
|
"response": {
|
|
"body": {
|
|
"__type__": "ollama._types.ProcessResponse",
|
|
"__data__": {
|
|
"models": [
|
|
{
|
|
"model": "llama-guard3:1b",
|
|
"name": "llama-guard3:1b",
|
|
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
|
|
"expires_at": "2025-07-29T14:32:56.756471-07:00",
|
|
"size": 2770397184,
|
|
"size_vram": 2770397184,
|
|
"details": {
|
|
"parent_model": "",
|
|
"format": "gguf",
|
|
"family": "llama",
|
|
"families": [
|
|
"llama"
|
|
],
|
|
"parameter_size": "1.5B",
|
|
"quantization_level": "Q8_0"
|
|
}
|
|
},
|
|
{
|
|
"model": "all-minilm:l6-v2",
|
|
"name": "all-minilm:l6-v2",
|
|
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
|
|
"expires_at": "2025-07-29T13:38:34.021809-07:00",
|
|
"size": 590204928,
|
|
"size_vram": 590204928,
|
|
"details": {
|
|
"parent_model": "",
|
|
"format": "gguf",
|
|
"family": "bert",
|
|
"families": [
|
|
"bert"
|
|
],
|
|
"parameter_size": "23M",
|
|
"quantization_level": "F16"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"is_streaming": false
|
|
}
|
|
}
|