fix(telemetry): remove unnessary calls to legacy tracing middleware

This commit is contained in:
Emilio Garcia 2025-11-11 14:34:46 -05:00
parent 503522716f
commit 9e2b92b8d2
4 changed files with 7 additions and 16 deletions

View file

@ -7,8 +7,6 @@
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from contextvars import ContextVar from contextvars import ContextVar
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
_MISSING = object() _MISSING = object()
@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
try: try:
yield item yield item
# Update our tracked values with any changes made during this iteration # Update our tracked values with any changes made during this iteration
# Only for non-trace context vars - trace context must persist across yields # This allows context changes to persist across generator iterations
# to allow nested span tracking for telemetry
for context_var in context_vars: for context_var in context_vars:
if context_var is not CURRENT_TRACE_CONTEXT: initial_context_values[context_var.name] = context_var.get()
initial_context_values[context_var.name] = context_var.get()
finally: finally:
# Restore non-trace context vars after each yield to prevent leaks between requests # Restore context vars after each yield to prevent leaks between requests
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
for context_var in context_vars: for context_var in context_vars:
if context_var is not CURRENT_TRACE_CONTEXT: _restore_context_var(context_var)
_restore_context_var(context_var)
return wrapper() return wrapper()

View file

@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
from openai import AuthenticationError from openai import AuthenticationError
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack_api import ( from llama_stack_api import (
@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
"""Override to enable streaming usage metrics and handle authentication errors.""" """Override to enable streaming usage metrics and handle authentication errors."""
# Enable streaming usage metrics when telemetry is active # Enable streaming usage metrics when telemetry is active
if params.stream and get_current_span() is not None: if params.stream:
if params.stream_options is None: if params.stream_options is None:
params.stream_options = {"include_usage": True} params.stream_options = {"include_usage": True}
elif "include_usage" not in params.stream_options: elif "include_usage" not in params.stream_options:

View file

@ -10,7 +10,6 @@ from typing import Any
import litellm import litellm
import requests import requests
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
# Add usage tracking for streaming when telemetry is active # Add usage tracking for streaming when telemetry is active
stream_options = params.stream_options stream_options = params.stream_options
if params.stream and get_current_span() is not None: if params.stream:
if stream_options is None: if stream_options is None:
stream_options = {"include_usage": True} stream_options = {"include_usage": True}
elif "include_usage" not in stream_options: elif "include_usage" not in stream_options:

View file

@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
params: OpenAIChatCompletionRequestWithExtraBody, params: OpenAIChatCompletionRequestWithExtraBody,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
# Add usage tracking for streaming when telemetry is active # Add usage tracking for streaming when telemetry is active
from llama_stack.core.telemetry.tracing import get_current_span
stream_options = params.stream_options stream_options = params.stream_options
if params.stream and get_current_span() is not None: if params.stream:
if stream_options is None: if stream_options is None:
stream_options = {"include_usage": True} stream_options = {"include_usage": True}
elif "include_usage" not in stream_options: elif "include_usage" not in stream_options: