mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fix(telemetry): remove unnessary calls to legacy tracing middleware
This commit is contained in:
parent
503522716f
commit
9e2b92b8d2
4 changed files with 7 additions and 16 deletions
|
|
@ -7,8 +7,6 @@
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from contextvars import ContextVar
|
from contextvars import ContextVar
|
||||||
|
|
||||||
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
|
|
||||||
|
|
||||||
_MISSING = object()
|
_MISSING = object()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
|
||||||
try:
|
try:
|
||||||
yield item
|
yield item
|
||||||
# Update our tracked values with any changes made during this iteration
|
# Update our tracked values with any changes made during this iteration
|
||||||
# Only for non-trace context vars - trace context must persist across yields
|
# This allows context changes to persist across generator iterations
|
||||||
# to allow nested span tracking for telemetry
|
|
||||||
for context_var in context_vars:
|
for context_var in context_vars:
|
||||||
if context_var is not CURRENT_TRACE_CONTEXT:
|
initial_context_values[context_var.name] = context_var.get()
|
||||||
initial_context_values[context_var.name] = context_var.get()
|
|
||||||
finally:
|
finally:
|
||||||
# Restore non-trace context vars after each yield to prevent leaks between requests
|
# Restore context vars after each yield to prevent leaks between requests
|
||||||
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
|
|
||||||
for context_var in context_vars:
|
for context_var in context_vars:
|
||||||
if context_var is not CURRENT_TRACE_CONTEXT:
|
_restore_context_var(context_var)
|
||||||
_restore_context_var(context_var)
|
|
||||||
|
|
||||||
return wrapper()
|
return wrapper()
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
|
||||||
|
|
||||||
from openai import AuthenticationError
|
from openai import AuthenticationError
|
||||||
|
|
||||||
from llama_stack.core.telemetry.tracing import get_current_span
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
|
|
@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""Override to enable streaming usage metrics and handle authentication errors."""
|
"""Override to enable streaming usage metrics and handle authentication errors."""
|
||||||
# Enable streaming usage metrics when telemetry is active
|
# Enable streaming usage metrics when telemetry is active
|
||||||
if params.stream and get_current_span() is not None:
|
if params.stream:
|
||||||
if params.stream_options is None:
|
if params.stream_options is None:
|
||||||
params.stream_options = {"include_usage": True}
|
params.stream_options = {"include_usage": True}
|
||||||
elif "include_usage" not in params.stream_options:
|
elif "include_usage" not in params.stream_options:
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ from typing import Any
|
||||||
import litellm
|
import litellm
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from llama_stack.core.telemetry.tracing import get_current_span
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
|
@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
|
|
||||||
# Add usage tracking for streaming when telemetry is active
|
# Add usage tracking for streaming when telemetry is active
|
||||||
stream_options = params.stream_options
|
stream_options = params.stream_options
|
||||||
if params.stream and get_current_span() is not None:
|
if params.stream:
|
||||||
if stream_options is None:
|
if stream_options is None:
|
||||||
stream_options = {"include_usage": True}
|
stream_options = {"include_usage": True}
|
||||||
elif "include_usage" not in stream_options:
|
elif "include_usage" not in stream_options:
|
||||||
|
|
|
||||||
|
|
@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
|
||||||
params: OpenAIChatCompletionRequestWithExtraBody,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
# Add usage tracking for streaming when telemetry is active
|
# Add usage tracking for streaming when telemetry is active
|
||||||
from llama_stack.core.telemetry.tracing import get_current_span
|
|
||||||
|
|
||||||
stream_options = params.stream_options
|
stream_options = params.stream_options
|
||||||
if params.stream and get_current_span() is not None:
|
if params.stream:
|
||||||
if stream_options is None:
|
if stream_options is None:
|
||||||
stream_options = {"include_usage": True}
|
stream_options = {"include_usage": True}
|
||||||
elif "include_usage" not in stream_options:
|
elif "include_usage" not in stream_options:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue