fix(ci): add back server:ci-tests to replay tests (#3976)

It is useful for local debugging. If both server and docker are failing, you can just run server locally to debug which is much easier to do.
2025-12-03 09:53:45 +00:00 · 2025-10-30 11:02:59 -07:00 · 2025-10-30 11:02:59 -07:00 · 77c8bc6fa7
commit 77c8bc6fa7
parent 5e20938832
4 changed files with 21 additions and 19 deletions
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -230,6 +230,7 @@ def instantiate_llama_stack_client(session):

        force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
        if force_restart:
+            print(f"Forcing restart of the server on port {port}")
            stop_server_on_port(port)

        # Check if port is available
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@ -10,7 +10,6 @@ import os

 import pytest

-import llama_stack.core.telemetry.telemetry as telemetry_module
 from llama_stack.testing.api_recorder import patch_httpx_for_test_id
 from tests.integration.fixtures.common import instantiate_llama_stack_client
 from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
@ -21,33 +20,26 @@ def telemetry_test_collector():
    stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")

    if stack_mode == "server":
+        # In server mode, the collector must be started and the server is already running.
+        # The integration test script (scripts/integration-tests.sh) should have set
+        # LLAMA_STACK_TEST_COLLECTOR_PORT and OTEL_EXPORTER_OTLP_ENDPOINT before starting the server.
        try:
            collector = OtlpHttpTestCollector()
        except RuntimeError as exc:
            pytest.skip(str(exc))
-        env_overrides = {
-            "OTEL_EXPORTER_OTLP_ENDPOINT": collector.endpoint,
-            "OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf",
-            "OTEL_BSP_SCHEDULE_DELAY": "200",
-            "OTEL_BSP_EXPORT_TIMEOUT": "2000",
-        }

-        previous_env = {key: os.environ.get(key) for key in env_overrides}
-
-        for key, value in env_overrides.items():
-            os.environ[key] = value
-
-        telemetry_module._TRACER_PROVIDER = None
+        # Verify the collector is listening on the expected endpoint
+        expected_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
+        if expected_endpoint and collector.endpoint != expected_endpoint:
+            pytest.skip(
+                f"Collector endpoint mismatch: expected {expected_endpoint}, got {collector.endpoint}. "
+                "Server was likely started before collector."
+            )

        try:
            yield collector
        finally:
            collector.shutdown()
-            for key, prior in previous_env.items():
-                if prior is None:
-                    os.environ.pop(key, None)
-                else:
-                    os.environ[key] = prior
    else:
        manager = InMemoryTelemetryManager()
        try: