feat(tests): make inference_recorder into api_recorder (include tool_invoke) (#3403)

Renames `inference_recorder.py` to `api_recorder.py` and extends it to
support recording/replaying tool invocations in addition to inference
calls.

This allows us to record web-search, etc. tool calls and thereafter
apply recordings for `tests/integration/responses`

## Test Plan

```
export OPENAI_API_KEY=...
export TAVILY_SEARCH_API_KEY=...

./scripts/integration-tests.sh --stack-config ci-tests \
   --suite responses --inference-mode record-if-missing
```
This commit is contained in:
Ashwin Bharambe 2025-10-09 14:27:51 -07:00 committed by GitHub
parent 26fd5dbd34
commit f50ce11a3b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
284 changed files with 296191 additions and 631 deletions

View file

@ -129,6 +129,8 @@ def client_with_models(
model_ids = {m.identifier for m in client.models.list()}
model_ids.update(m.provider_resource_id for m in client.models.list())
# TODO: fix this crap where we use the first provider randomly
# that cannot be right. I think the test should just specify the provider_id
if text_model_id and text_model_id not in model_ids:
client.models.register(model_id=text_model_id, provider_id=inference_providers[0])
if vision_model_id and vision_model_id not in model_ids:
@ -183,6 +185,12 @@ def llama_stack_client(request):
# would be forced to use llama_stack_client, which is not what we want.
print("\ninstantiating llama_stack_client")
start_time = time.time()
# Patch httpx to inject test ID for server-mode test isolation
from llama_stack.testing.api_recorder import patch_httpx_for_test_id
patch_httpx_for_test_id()
client = instantiate_llama_stack_client(request.session)
print(f"llama_stack_client instantiated in {time.time() - start_time:.3f}s")
return client