feat(tests): make inference_recorder into api_recorder (include tool_invoke) (#3403)

Renames `inference_recorder.py` to `api_recorder.py` and extends it to
support recording/replaying tool invocations in addition to inference
calls.

This allows us to record web-search, etc. tool calls and thereafter
apply recordings for `tests/integration/responses`

## Test Plan

```
export OPENAI_API_KEY=...
export TAVILY_SEARCH_API_KEY=...

./scripts/integration-tests.sh --stack-config ci-tests \
   --suite responses --inference-mode record-if-missing
```
This commit is contained in:
Ashwin Bharambe 2025-10-09 14:27:51 -07:00 committed by GitHub
parent 26fd5dbd34
commit f50ce11a3b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
284 changed files with 296191 additions and 631 deletions

View file

@ -232,14 +232,25 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
await log_request_pre_validation(request)
test_context_token = None
# Use context manager with both provider data and auth attributes
with request_provider_data_context(request.headers, user):
if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE"):
from llama_stack.core.testing_context import (
TEST_CONTEXT,
reset_test_context,
sync_test_context_from_provider_data,
)
test_context_token = sync_test_context_from_provider_data()
is_streaming = is_streaming_request(func.__name__, request, **kwargs)
try:
if is_streaming:
gen = preserve_contexts_async_generator(
sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR, TEST_CONTEXT]
)
return StreamingResponse(gen, media_type="text/event-stream")
else:
@ -258,6 +269,9 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
else:
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
raise translate_exception(e) from e
finally:
if test_context_token is not None:
reset_test_context(test_context_token)
sig = inspect.signature(func)