From 8b46d5966b9bc9d29cf8cac01ddb137107e85a26 Mon Sep 17 00:00:00 2001 From: Emilio Garcia Date: Tue, 11 Nov 2025 14:34:46 -0500 Subject: [PATCH] fix(telemetry): remove unnessary calls to legacy tracing middleware --- src/llama_stack/core/utils/context.py | 14 ++++---------- .../providers/remote/inference/bedrock/bedrock.py | 3 +-- .../providers/remote/inference/watsonx/watsonx.py | 3 +-- .../utils/inference/litellm_openai_mixin.py | 3 +-- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/llama_stack/core/utils/context.py b/src/llama_stack/core/utils/context.py index e7c61a8ed..0c3e41f00 100644 --- a/src/llama_stack/core/utils/context.py +++ b/src/llama_stack/core/utils/context.py @@ -7,8 +7,6 @@ from collections.abc import AsyncGenerator from contextvars import ContextVar -from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT - _MISSING = object() @@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T]( try: yield item # Update our tracked values with any changes made during this iteration - # Only for non-trace context vars - trace context must persist across yields - # to allow nested span tracking for telemetry + # This allows context changes to persist across generator iterations for context_var in context_vars: - if context_var is not CURRENT_TRACE_CONTEXT: - initial_context_values[context_var.name] = context_var.get() + initial_context_values[context_var.name] = context_var.get() finally: - # Restore non-trace context vars after each yield to prevent leaks between requests - # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack + # Restore context vars after each yield to prevent leaks between requests for context_var in context_vars: - if context_var is not CURRENT_TRACE_CONTEXT: - _restore_context_var(context_var) + _restore_context_var(context_var) return wrapper() diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py index 70ee95916..17d1aca77 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable from openai import AuthenticationError -from llama_stack.core.telemetry.tracing import get_current_span from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( @@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin): ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: """Override to enable streaming usage metrics and handle authentication errors.""" # Enable streaming usage metrics when telemetry is active - if params.stream and get_current_span() is not None: + if params.stream: if params.stream_options is None: params.stream_options = {"include_usage": True} elif "include_usage" not in params.stream_options: diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py index aab9e2dca..8d204d72a 100644 --- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -10,7 +10,6 @@ from typing import Any import litellm import requests -from llama_stack.core.telemetry.tracing import get_current_span from llama_stack.log import get_logger from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin @@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): # Add usage tracking for streaming when telemetry is active stream_options = params.stream_options - if params.stream and get_current_span() is not None: + if params.stream: if stream_options is None: stream_options = {"include_usage": True} elif "include_usage" not in stream_options: diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py index c462d1aad..47c68ff0a 100644 --- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin( params: OpenAIChatCompletionRequestWithExtraBody, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: # Add usage tracking for streaming when telemetry is active - from llama_stack.core.telemetry.tracing import get_current_span stream_options = params.stream_options - if params.stream and get_current_span() is not None: + if params.stream: if stream_options is None: stream_options = {"include_usage": True} elif "include_usage" not in stream_options: