fix(telemetry): remove unnessary calls to legacy tracing middleware

2025-12-03 09:53:45 +00:00 · 2025-11-11 14:34:46 -05:00 · 2025-11-11 14:34:46 -05:00 · 9e2b92b8d2
commit 9e2b92b8d2
parent 503522716f
4 changed files with 7 additions and 16 deletions
--- a/src/llama_stack/core/utils/context.py
+++ b/src/llama_stack/core/utils/context.py
@ -7,8 +7,6 @@
 from collections.abc import AsyncGenerator
 from contextvars import ContextVar
 from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
 _MISSING = object()
@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
            try:
                yield item
                # Update our tracked values with any changes made during this iteration
-                # Only for non-trace context vars - trace context must persist across yields
+                # This allows context changes to persist across generator iterations
                # to allow nested span tracking for telemetry
                for context_var in context_vars:
-                    if context_var is not CURRENT_TRACE_CONTEXT:
+                    initial_context_values[context_var.name] = context_var.get()
                        initial_context_values[context_var.name] = context_var.get()
            finally:
-                # Restore non-trace context vars after each yield to prevent leaks between requests
+                # Restore context vars after each yield to prevent leaks between requests
                # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
                for context_var in context_vars:
-                    if context_var is not CURRENT_TRACE_CONTEXT:
+                    _restore_context_var(context_var)
                        _restore_context_var(context_var)
    return wrapper()
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
 from openai import AuthenticationError
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
        """Override to enable streaming usage metrics and handle authentication errors."""
        # Enable streaming usage metrics when telemetry is active
-        if params.stream and get_current_span() is not None:
+        if params.stream:
            if params.stream_options is None:
                params.stream_options = {"include_usage": True}
            elif "include_usage" not in params.stream_options:
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -10,7 +10,6 @@ from typing import Any
 import litellm
 import requests
 from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
        # Add usage tracking for streaming when telemetry is active
        stream_options = params.stream_options
-        if params.stream and get_current_span() is not None:
+        if params.stream:
            if stream_options is None:
                stream_options = {"include_usage": True}
            elif "include_usage" not in stream_options:
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
        params: OpenAIChatCompletionRequestWithExtraBody,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
        # Add usage tracking for streaming when telemetry is active
        from llama_stack.core.telemetry.tracing import get_current_span
        stream_options = params.stream_options
-        if params.stream and get_current_span() is not None:
+        if params.stream:
            if stream_options is None:
                stream_options = {"include_usage": True}
            elif "include_usage" not in stream_options: