fix(ci): add back server:ci-tests to replay tests (#3976)

It is useful for local debugging. If both server and docker are failing, you can just run server locally to debug which is much easier to do.
2025-12-03 18:00:36 +00:00 · 2025-10-30 11:02:59 -07:00 · 2025-10-30 11:02:59 -07:00 · 77c8bc6fa7
commit 77c8bc6fa7
parent 5e20938832
4 changed files with 21 additions and 19 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -47,7 +47,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        client-type: [library, docker]
+        client-type: [library, docker, server]
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
        echo "=== Starting Llama Stack Server ==="
        export LLAMA_STACK_LOG_WIDTH=120
        # Configure telemetry collector for server mode
        # Use a fixed port for the OTEL collector so the server can connect to it
        COLLECTOR_PORT=4317
        export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
        export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
        export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
        export OTEL_BSP_SCHEDULE_DELAY="200"
        export OTEL_BSP_EXPORT_TIMEOUT="2000"
        # remove "server:" from STACK_CONFIG
        stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
        nohup llama stack run $stack_config > server.log 2>&1 &
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -230,6 +230,7 @@ def instantiate_llama_stack_client(session):
        force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
        if force_restart:
            print(f"Forcing restart of the server on port {port}")
            stop_server_on_port(port)
        # Check if port is available
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@ -10,7 +10,6 @@ import os
 import pytest
 import llama_stack.core.telemetry.telemetry as telemetry_module
 from llama_stack.testing.api_recorder import patch_httpx_for_test_id
 from tests.integration.fixtures.common import instantiate_llama_stack_client
 from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
@ -21,33 +20,26 @@ def telemetry_test_collector():
    stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
    if stack_mode == "server":
        # In server mode, the collector must be started and the server is already running.
        # The integration test script (scripts/integration-tests.sh) should have set
        # LLAMA_STACK_TEST_COLLECTOR_PORT and OTEL_EXPORTER_OTLP_ENDPOINT before starting the server.
        try:
            collector = OtlpHttpTestCollector()
        except RuntimeError as exc:
            pytest.skip(str(exc))
        env_overrides = {
            "OTEL_EXPORTER_OTLP_ENDPOINT": collector.endpoint,
            "OTEL_EXPORTER_OTLP_PROTOCOL": "http/protobuf",
            "OTEL_BSP_SCHEDULE_DELAY": "200",
            "OTEL_BSP_EXPORT_TIMEOUT": "2000",
        }
-        previous_env = {key: os.environ.get(key) for key in env_overrides}
+        # Verify the collector is listening on the expected endpoint
-
+        expected_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
-        for key, value in env_overrides.items():
+        if expected_endpoint and collector.endpoint != expected_endpoint:
-            os.environ[key] = value
+            pytest.skip(
-
+                f"Collector endpoint mismatch: expected {expected_endpoint}, got {collector.endpoint}. "
-        telemetry_module._TRACER_PROVIDER = None
+                "Server was likely started before collector."
            )
        try:
            yield collector
        finally:
            collector.shutdown()
            for key, prior in previous_env.items():
                if prior is None:
                    os.environ.pop(key, None)
                else:
                    os.environ[key] = prior
    else:
        manager = InMemoryTelemetryManager()
        try: