mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-11 05:38:38 +00:00
feat(tests): make inference_recorder into api_recorder (include tool_invoke) (#3403)
Renames `inference_recorder.py` to `api_recorder.py` and extends it to support recording/replaying tool invocations in addition to inference calls. This allows us to record web-search, etc. tool calls and thereafter apply recordings for `tests/integration/responses` ## Test Plan ``` export OPENAI_API_KEY=... export TAVILY_SEARCH_API_KEY=... ./scripts/integration-tests.sh --stack-config ci-tests \ --suite responses --inference-mode record-if-missing ```
This commit is contained in:
parent
26fd5dbd34
commit
f50ce11a3b
284 changed files with 296191 additions and 631 deletions
|
@ -159,7 +159,6 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
import threading
|
||||
import time
|
||||
|
||||
import httpx
|
||||
import uvicorn
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
from mcp.server.sse import SseServerTransport
|
||||
|
@ -171,6 +170,11 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
|
||||
server = FastMCP("FastMCP Test Server", log_level="WARNING")
|
||||
|
||||
# Silence verbose MCP server logs
|
||||
import logging # allow-direct-logging
|
||||
|
||||
logging.getLogger("mcp.server.lowlevel.server").setLevel(logging.WARNING)
|
||||
|
||||
tools = tools or default_tools()
|
||||
|
||||
# Register all tools with the server
|
||||
|
@ -234,29 +238,25 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
|
|||
logger.debug(f"Starting MCP server thread on port {port}")
|
||||
server_thread.start()
|
||||
|
||||
# Polling until the server is ready
|
||||
timeout = 10
|
||||
# Wait for the server thread to be running
|
||||
# Note: We can't use a simple HTTP GET health check on /sse because it's an SSE endpoint
|
||||
# that expects a long-lived connection, not a simple request/response
|
||||
timeout = 2
|
||||
start_time = time.time()
|
||||
|
||||
server_url = f"http://localhost:{port}/sse"
|
||||
logger.debug(f"Waiting for MCP server to be ready at {server_url}")
|
||||
logger.debug(f"Waiting for MCP server thread to start on port {port}")
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
try:
|
||||
response = httpx.get(server_url)
|
||||
if response.status_code in [200, 401]:
|
||||
logger.debug(f"MCP server is ready on port {port} (status: {response.status_code})")
|
||||
break
|
||||
except httpx.RequestError as e:
|
||||
logger.debug(f"Server not ready yet, retrying... ({e})")
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
if server_thread.is_alive():
|
||||
# Give the server a moment to bind to the port
|
||||
time.sleep(0.1)
|
||||
logger.debug(f"MCP server is ready on port {port}")
|
||||
break
|
||||
time.sleep(0.05)
|
||||
else:
|
||||
# If we exit the loop due to timeout
|
||||
logger.error(f"MCP server failed to start within {timeout} seconds on port {port}")
|
||||
logger.error(f"Thread alive: {server_thread.is_alive()}")
|
||||
if server_thread.is_alive():
|
||||
logger.error("Server thread is still running but not responding to HTTP requests")
|
||||
logger.error(f"MCP server thread failed to start within {timeout} seconds on port {port}")
|
||||
|
||||
try:
|
||||
yield {"server_url": server_url}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue