feat(tests): make inference_recorder into api_recorder (include tool_invoke) (#3403)

Renames `inference_recorder.py` to `api_recorder.py` and extends it to
support recording/replaying tool invocations in addition to inference
calls.

This allows us to record web-search, etc. tool calls and thereafter
apply recordings for `tests/integration/responses`

## Test Plan

```
export OPENAI_API_KEY=...
export TAVILY_SEARCH_API_KEY=...

./scripts/integration-tests.sh --stack-config ci-tests \
   --suite responses --inference-mode record-if-missing
```
This commit is contained in:
Ashwin Bharambe 2025-10-09 14:27:51 -07:00 committed by GitHub
parent 26fd5dbd34
commit f50ce11a3b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
284 changed files with 296191 additions and 631 deletions

View file

@ -41,32 +41,33 @@ basic_test_cases = [
),
id="saturn",
),
pytest.param(
ResponsesTestCase(
input=[
{
"role": "user",
"content": [
{
"type": "input_text",
"text": "what teams are playing in this image?",
}
],
},
{
"role": "user",
"content": [
{
"type": "input_image",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/3/3b/LeBron_James_Layup_%28Cleveland_vs_Brooklyn_2018%29.jpg",
}
],
},
],
expected="brooklyn nets",
),
id="image_input",
),
# TODO: Add image input test case, since this test case got Nerfed by OpenAI with a refusal
# pytest.param(
# ResponsesTestCase(
# input=[
# {
# "role": "user",
# "content": [
# {
# "type": "input_text",
# "text": "what teams are playing in this image?",
# }
# ],
# },
# {
# "role": "user",
# "content": [
# {
# "type": "input_image",
# "image_url": "https://upload.wikimedia.org/wikipedia/commons/3/3b/LeBron_James_Layup_%28Cleveland_vs_Brooklyn_2018%29.jpg",
# }
# ],
# },
# ],
# expected="brooklyn nets",
# ),
# id="image_input",
# ),
]
# Multi-turn test cases