mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 22:18:00 +00:00
feat(tests): record responses for evals and telemetry tests (#2954)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 8s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 6s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 10s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 10s
Python Package Build Test / build (3.12) (push) Failing after 1s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 7s
Test Llama Stack Build / generate-matrix (push) Successful in 7s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 7s
Test Llama Stack Build / build-single-provider (push) Failing after 10s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Test External API and Providers / test-external (venv) (push) Failing after 10s
Test Llama Stack Build / build (push) Failing after 8s
Integration Tests / test-matrix (push) Failing after 9s
Unit Tests / unit-tests (3.13) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 29s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 26s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 39s
Python Package Build Test / build (3.13) (push) Failing after 38s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 41s
Pre-commit / pre-commit (push) Successful in 2m2s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 8s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 6s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 10s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 10s
Python Package Build Test / build (3.12) (push) Failing after 1s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 7s
Test Llama Stack Build / generate-matrix (push) Successful in 7s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 7s
Test Llama Stack Build / build-single-provider (push) Failing after 10s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Test External API and Providers / test-external (venv) (push) Failing after 10s
Test Llama Stack Build / build (push) Failing after 8s
Integration Tests / test-matrix (push) Failing after 9s
Unit Tests / unit-tests (3.13) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 29s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 26s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 39s
Python Package Build Test / build (3.13) (push) Failing after 38s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 41s
Pre-commit / pre-commit (push) Successful in 2m2s
Continuing with https://github.com/meta-llama/llama-stack/pull/2952 This also includes a "fix" to inference store related tests so that we pull a large number of inference responses from the DB so as to always find the one we just wrote.
This commit is contained in:
parent
81c7d6fa2e
commit
0ac503ec0d
13 changed files with 881 additions and 41 deletions
257
tests/integration/recordings/responses/7e6806cba34a.json
Normal file
257
tests/integration/recordings/responses/7e6806cba34a.json
Normal file
|
@ -0,0 +1,257 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:11434/api/generate",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"raw": true,
|
||||
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is 2 + 2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"options": {
|
||||
"temperature": 0.0
|
||||
},
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/api/generate",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.546801Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "The",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.588481Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " answer",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.630066Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " to",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.671027Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " ",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.712294Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "2",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.753866Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " +",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.795863Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " ",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.83722Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "2",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.878343Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " is",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.919504Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": " ",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:42.960515Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": "4",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:43.001631Z",
|
||||
"done": false,
|
||||
"done_reason": null,
|
||||
"total_duration": null,
|
||||
"load_duration": null,
|
||||
"prompt_eval_count": null,
|
||||
"prompt_eval_duration": null,
|
||||
"eval_count": null,
|
||||
"eval_duration": null,
|
||||
"response": ".",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "ollama._types.GenerateResponse",
|
||||
"__data__": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"created_at": "2025-07-29T21:56:43.043353Z",
|
||||
"done": true,
|
||||
"done_reason": "stop",
|
||||
"total_duration": 834188250,
|
||||
"load_duration": 75182417,
|
||||
"prompt_eval_count": 29,
|
||||
"prompt_eval_duration": 261107458,
|
||||
"eval_count": 13,
|
||||
"eval_duration": 497358667,
|
||||
"response": "",
|
||||
"thinking": null,
|
||||
"context": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue