mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-11 19:56:03 +00:00
fix(telemetry): configure OTEL collector before server starts in integration tests
The telemetry integration tests were failing in server mode because the OTEL collector configuration was being set up after the server had already started. This meant the server never received telemetry configuration and couldn't export spans to the collector, resulting in empty span collections. Changes: - Set OTEL environment variables in integration-tests.sh before starting server - Use LLAMA_STACK_TEST_COLLECTOR_PORT to ensure collector and server use same port - Simplify conftest.py to not override env vars in server mode since server is already running - Add verification that collector endpoint matches expected endpoint This ensures telemetry spans are properly collected during server mode tests.
This commit is contained in:
parent
a68079feb5
commit
a371475cc8
2 changed files with 19 additions and 17 deletions
|
|
@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
||||||
echo "=== Starting Llama Stack Server ==="
|
echo "=== Starting Llama Stack Server ==="
|
||||||
export LLAMA_STACK_LOG_WIDTH=120
|
export LLAMA_STACK_LOG_WIDTH=120
|
||||||
|
|
||||||
|
# Configure telemetry collector for server mode
|
||||||
|
# Use a fixed port for the OTEL collector so the server can connect to it
|
||||||
|
COLLECTOR_PORT=4317
|
||||||
|
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
|
||||||
|
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
|
||||||
|
export OTEL_BSP_SCHEDULE_DELAY="200"
|
||||||
|
export OTEL_BSP_EXPORT_TIMEOUT="2000"
|
||||||
|
|
||||||
# remove "server:" from STACK_CONFIG
|
# remove "server:" from STACK_CONFIG
|
||||||
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
|
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
|
||||||
nohup llama stack run $stack_config > server.log 2>&1 &
|
nohup llama stack run $stack_config > server.log 2>&1 &
|
||||||
|
|
|
||||||
|
|
@ -21,33 +21,26 @@ def telemetry_test_collector():
|
||||||
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
|
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
|
||||||
|
|
||||||
if stack_mode == "server":
|
if stack_mode == "server":
|
||||||
|
# In server mode, the collector must be started and the server is already running.
|
||||||
|
# The integration test script (scripts/integration-tests.sh) should have set
|
||||||
|
# LLAMA_STACK_TEST_COLLECTOR_PORT and OTEL_EXPORTER_OTLP_ENDPOINT before starting the server.
|
||||||
try:
|
try:
|
||||||
collector = OtlpHttpTestCollector()
|
collector = OtlpHttpTestCollector()
|
||||||
except RuntimeError as exc:
|
except RuntimeError as exc:
|
||||||
pytest.skip(str(exc))
|
pytest.skip(str(exc))
|
||||||
env_overrides = {
|
|
||||||
"OTEL_EXPORTER_OTLP_ENDPOINT": collector.endpoint,
|
|
||||||
"OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf",
|
|
||||||
"OTEL_BSP_SCHEDULE_DELAY": "200",
|
|
||||||
"OTEL_BSP_EXPORT_TIMEOUT": "2000",
|
|
||||||
}
|
|
||||||
|
|
||||||
previous_env = {key: os.environ.get(key) for key in env_overrides}
|
# Verify the collector is listening on the expected endpoint
|
||||||
|
expected_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
|
||||||
for key, value in env_overrides.items():
|
if expected_endpoint and collector.endpoint != expected_endpoint:
|
||||||
os.environ[key] = value
|
pytest.skip(
|
||||||
|
f"Collector endpoint mismatch: expected {expected_endpoint}, got {collector.endpoint}. "
|
||||||
telemetry_module._TRACER_PROVIDER = None
|
"Server was likely started before collector."
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield collector
|
yield collector
|
||||||
finally:
|
finally:
|
||||||
collector.shutdown()
|
collector.shutdown()
|
||||||
for key, prior in previous_env.items():
|
|
||||||
if prior is None:
|
|
||||||
os.environ.pop(key, None)
|
|
||||||
else:
|
|
||||||
os.environ[key] = prior
|
|
||||||
else:
|
else:
|
||||||
manager = InMemoryTelemetryManager()
|
manager = InMemoryTelemetryManager()
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue