From 77c8bc6fa7389d0e82495b203fa32e79c9eec6a7 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 30 Oct 2025 11:02:59 -0700 Subject: [PATCH] fix(ci): add back server:ci-tests to replay tests (#3976) It is useful for local debugging. If both server and docker are failing, you can just run server locally to debug which is much easier to do. --- .github/workflows/integration-tests.yml | 2 +- scripts/integration-tests.sh | 9 ++++++++ tests/integration/fixtures/common.py | 1 + tests/integration/telemetry/conftest.py | 28 +++++++++---------------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 2b8965aad..067f49abd 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -47,7 +47,7 @@ jobs: strategy: fail-fast: false matrix: - client-type: [library, docker] + client-type: [library, docker, server] # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index a09dc8621..ed3934a5b 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then echo "=== Starting Llama Stack Server ===" export LLAMA_STACK_LOG_WIDTH=120 + # Configure telemetry collector for server mode + # Use a fixed port for the OTEL collector so the server can connect to it + COLLECTOR_PORT=4317 + export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}" + export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}" + export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf" + export OTEL_BSP_SCHEDULE_DELAY="200" + export OTEL_BSP_EXPORT_TIMEOUT="2000" + # remove "server:" from STACK_CONFIG stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://') nohup llama stack run $stack_config > server.log 2>&1 & diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 41822f850..e68f9dc9e 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -230,6 +230,7 @@ def instantiate_llama_stack_client(session): force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1" if force_restart: + print(f"Forcing restart of the server on port {port}") stop_server_on_port(port) # Check if port is available diff --git a/tests/integration/telemetry/conftest.py b/tests/integration/telemetry/conftest.py index 58ac4e0df..fd9224ae4 100644 --- a/tests/integration/telemetry/conftest.py +++ b/tests/integration/telemetry/conftest.py @@ -10,7 +10,6 @@ import os import pytest -import llama_stack.core.telemetry.telemetry as telemetry_module from llama_stack.testing.api_recorder import patch_httpx_for_test_id from tests.integration.fixtures.common import instantiate_llama_stack_client from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector @@ -21,33 +20,26 @@ def telemetry_test_collector(): stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client") if stack_mode == "server": + # In server mode, the collector must be started and the server is already running. + # The integration test script (scripts/integration-tests.sh) should have set + # LLAMA_STACK_TEST_COLLECTOR_PORT and OTEL_EXPORTER_OTLP_ENDPOINT before starting the server. try: collector = OtlpHttpTestCollector() except RuntimeError as exc: pytest.skip(str(exc)) - env_overrides = { - "OTEL_EXPORTER_OTLP_ENDPOINT": collector.endpoint, - "OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf", - "OTEL_BSP_SCHEDULE_DELAY": "200", - "OTEL_BSP_EXPORT_TIMEOUT": "2000", - } - previous_env = {key: os.environ.get(key) for key in env_overrides} - - for key, value in env_overrides.items(): - os.environ[key] = value - - telemetry_module._TRACER_PROVIDER = None + # Verify the collector is listening on the expected endpoint + expected_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") + if expected_endpoint and collector.endpoint != expected_endpoint: + pytest.skip( + f"Collector endpoint mismatch: expected {expected_endpoint}, got {collector.endpoint}. " + "Server was likely started before collector." + ) try: yield collector finally: collector.shutdown() - for key, prior in previous_env.items(): - if prior is None: - os.environ.pop(key, None) - else: - os.environ[key] = prior else: manager = InMemoryTelemetryManager() try: