mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-24 16:57:21 +00:00
Uses test_id in request hashes and test-scoped subdirectories to prevent cross-test contamination. Model list endpoints exclude test_id to enable merging recordings from different servers. Additionally, this PR adds a `record-if-missing` mode (which we will use instead of `record` which records everything) which is very useful. 🤖 Co-authored with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
40 lines
1.2 KiB
JSON
40 lines
1.2 KiB
JSON
{
|
|
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_non_streaming_suffix[txt=ollama/llama3.2:3b-instruct-fp16-inference:completion:suffix]",
|
|
"request": {
|
|
"method": "POST",
|
|
"url": "http://localhost:11434/api/ps",
|
|
"headers": {},
|
|
"body": {},
|
|
"endpoint": "/api/ps",
|
|
"model": ""
|
|
},
|
|
"response": {
|
|
"body": {
|
|
"__type__": "ollama._types.ProcessResponse",
|
|
"__data__": {
|
|
"models": [
|
|
{
|
|
"model": "llama3.2:3b-instruct-fp16",
|
|
"name": "llama3.2:3b-instruct-fp16",
|
|
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
|
|
"expires_at": "2025-10-03T20:05:44.851615-07:00",
|
|
"size": 7919570944,
|
|
"size_vram": 7919570944,
|
|
"details": {
|
|
"parent_model": "",
|
|
"format": "gguf",
|
|
"family": "llama",
|
|
"families": [
|
|
"llama"
|
|
],
|
|
"parameter_size": "3.2B",
|
|
"quantization_level": "F16"
|
|
},
|
|
"context_length": 4096
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"is_streaming": false
|
|
}
|
|
}
|