From 77c8bc6fa7389d0e82495b203fa32e79c9eec6a7 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 30 Oct 2025 11:02:59 -0700
Subject: [PATCH] fix(ci): add back server:ci-tests to replay tests (#3976)

It is useful for local debugging. If both server and docker are failing,
you can just run server locally to debug which is much easier to do.
---
 .github/workflows/integration-tests.yml |  2 +-
 scripts/integration-tests.sh            |  9 ++++++++
 tests/integration/fixtures/common.py    |  1 +
 tests/integration/telemetry/conftest.py | 28 +++++++++----------------
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 2b8965aad..067f49abd 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -47,7 +47,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        client-type: [library, docker]
+        client-type: [library, docker, server]
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index a09dc8621..ed3934a5b 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
         echo "=== Starting Llama Stack Server ==="
         export LLAMA_STACK_LOG_WIDTH=120
 
+        # Configure telemetry collector for server mode
+        # Use a fixed port for the OTEL collector so the server can connect to it
+        COLLECTOR_PORT=4317
+        export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
+        export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
+        export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
+        export OTEL_BSP_SCHEDULE_DELAY="200"
+        export OTEL_BSP_EXPORT_TIMEOUT="2000"
+
         # remove "server:" from STACK_CONFIG
         stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
         nohup llama stack run $stack_config > server.log 2>&1 &
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 41822f850..e68f9dc9e 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -230,6 +230,7 @@ def instantiate_llama_stack_client(session):
 
         force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
         if force_restart:
+            print(f"Forcing restart of the server on port {port}")
             stop_server_on_port(port)
 
         # Check if port is available
diff --git a/tests/integration/telemetry/conftest.py b/tests/integration/telemetry/conftest.py
index 58ac4e0df..fd9224ae4 100644
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@@ -10,7 +10,6 @@ import os
 
 import pytest
 
-import llama_stack.core.telemetry.telemetry as telemetry_module
 from llama_stack.testing.api_recorder import patch_httpx_for_test_id
 from tests.integration.fixtures.common import instantiate_llama_stack_client
 from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
@@ -21,33 +20,26 @@ def telemetry_test_collector():
     stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
 
     if stack_mode == "server":
+        # In server mode, the collector must be started and the server is already running.
+        # The integration test script (scripts/integration-tests.sh) should have set
+        # LLAMA_STACK_TEST_COLLECTOR_PORT and OTEL_EXPORTER_OTLP_ENDPOINT before starting the server.
         try:
             collector = OtlpHttpTestCollector()
         except RuntimeError as exc:
             pytest.skip(str(exc))
-        env_overrides = {
-            "OTEL_EXPORTER_OTLP_ENDPOINT": collector.endpoint,
-            "OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf",
-            "OTEL_BSP_SCHEDULE_DELAY": "200",
-            "OTEL_BSP_EXPORT_TIMEOUT": "2000",
-        }
 
-        previous_env = {key: os.environ.get(key) for key in env_overrides}
-
-        for key, value in env_overrides.items():
-            os.environ[key] = value
-
-        telemetry_module._TRACER_PROVIDER = None
+        # Verify the collector is listening on the expected endpoint
+        expected_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
+        if expected_endpoint and collector.endpoint != expected_endpoint:
+            pytest.skip(
+                f"Collector endpoint mismatch: expected {expected_endpoint}, got {collector.endpoint}. "
+                "Server was likely started before collector."
+            )
 
         try:
             yield collector
         finally:
             collector.shutdown()
-            for key, prior in previous_env.items():
-                if prior is None:
-                    os.environ.pop(key, None)
-                else:
-                    os.environ[key] = prior
     else:
         manager = InMemoryTelemetryManager()
         try: