fix: disable Gunicorn in telemetry tests to fix multi-process telemetry collection

Telemetry tests use an OTLP collector that expects single-process
telemetry spans. Gunicorn's multi-process architecture spawns multiple
workers, each with separate telemetry instrumentation, preventing the
test collector from capturing all spans.

This commit adds LLAMA_STACK_DISABLE_GUNICORN environment variable
support and sets it in telemetry test configuration to ensure
single-process Uvicorn is used during tests while maintaining
production multi-process behavior.

Fixes failing tests:
- test_streaming_chunk_count
- test_telemetry_format_completeness
This commit is contained in:
Roy Belio 2025-10-30 18:01:47 +02:00
parent b060f73e6d
commit 3e1d0060c1
2 changed files with 8 additions and 1 deletions

View file

@ -169,11 +169,17 @@ class StackRun(Subcommand):
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
# signal handling but this is quite intrusive and not worth the effort.
try:
if sys.platform in ("linux", "darwin"):
# Check if Gunicorn should be disabled (for testing or debugging)
disable_gunicorn = os.getenv("LLAMA_STACK_DISABLE_GUNICORN", "false").lower() == "true"
if not disable_gunicorn and sys.platform in ("linux", "darwin"):
# On Unix-like systems, use Gunicorn with Uvicorn workers for production-grade performance
self._run_with_gunicorn(host, port, uvicorn_config)
else:
# On other systems (e.g., Windows), fall back to Uvicorn directly
# Also used when LLAMA_STACK_DISABLE_GUNICORN=true (for tests)
if disable_gunicorn:
logger.info("Gunicorn disabled via LLAMA_STACK_DISABLE_GUNICORN environment variable")
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
except (KeyboardInterrupt, SystemExit):
logger.info("Received interrupt signal, shutting down gracefully...")

View file

@ -30,6 +30,7 @@ def telemetry_test_collector():
"OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf",
"OTEL_BSP_SCHEDULE_DELAY": "200",
"OTEL_BSP_EXPORT_TIMEOUT": "2000",
"LLAMA_STACK_DISABLE_GUNICORN": "true", # Disable multi-process for telemetry collection
}
previous_env = {key: os.environ.get(key) for key in env_overrides}