feat(tests): make inference_recorder into api_recorder (include tool_invoke) (#3403)

Renames `inference_recorder.py` to `api_recorder.py` and extends it to support recording/replaying tool invocations in addition to inference calls. This allows us to record web-search, etc. tool calls and thereafter apply recordings for `tests/integration/responses` ## Test Plan ``` export OPENAI_API_KEY=... export TAVILY_SEARCH_API_KEY=... ./scripts/integration-tests.sh --stack-config ci-tests \ --suite responses --inference-mode record-if-missing ```
2025-12-04 02:03:44 +00:00 · 2025-10-09 14:27:51 -07:00 · 2025-10-09 14:27:51 -07:00 · f50ce11a3b
commit f50ce11a3b
parent 26fd5dbd34
284 changed files with 296191 additions and 631 deletions
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -316,13 +316,13 @@ class Stack:
    # asked for in the run config.
    async def initialize(self):
        if "LLAMA_STACK_TEST_INFERENCE_MODE" in os.environ:
-            from llama_stack.testing.inference_recorder import setup_inference_recording
+            from llama_stack.testing.api_recorder import setup_api_recording

            global TEST_RECORDING_CONTEXT
-            TEST_RECORDING_CONTEXT = setup_inference_recording()
+            TEST_RECORDING_CONTEXT = setup_api_recording()
            if TEST_RECORDING_CONTEXT:
                TEST_RECORDING_CONTEXT.__enter__()
-                logger.info(f"Inference recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
+                logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")

        dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
        policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
@ -381,7 +381,7 @@ class Stack:
            try:
                TEST_RECORDING_CONTEXT.__exit__(None, None, None)
            except Exception as e:
-                logger.error(f"Error during inference recording cleanup: {e}")
+                logger.error(f"Error during API recording cleanup: {e}")

        global REGISTRY_REFRESH_TASK
        if REGISTRY_REFRESH_TASK: