From 68d8f2186f8530062dec5d1e1f0e71dbbf901c79 Mon Sep 17 00:00:00 2001 From: grs Date: Thu, 26 Jun 2025 16:41:35 +0100 Subject: [PATCH] fix: fix test of root span to match what is being set (#2494) # What does this PR do? I get errors when trying to query spans. It appears to be a result of traces being inserted where there is no root_span_id which causes a pydantic validation error on trying to load the data for a query response (and in any case having no span referenced undermines the purpose of the trace). The root cause as far as I can see is an invalid test in the code that inserts the trace, where it is testing for the string "true" against an object set to the python value True. Closes #2493 ## Test Plan With this change I can query spans. Signed-off-by: Gordon Sim --- .../telemetry/meta_reference/sqlite_span_processor.py | 4 +++- llama_stack/providers/utils/telemetry/tracing.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py b/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py index b329a363c..8ab491189 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/sqlite_span_processor.py @@ -14,6 +14,8 @@ from opentelemetry.sdk.trace import SpanProcessor from opentelemetry.trace import Span from opentelemetry.trace.span import format_span_id, format_trace_id +from llama_stack.providers.utils.telemetry.tracing import LOCAL_ROOT_SPAN_MARKER + class SQLiteSpanProcessor(SpanProcessor): def __init__(self, conn_string): @@ -124,7 +126,7 @@ class SQLiteSpanProcessor(SpanProcessor): ( trace_id, service_name, - (span_id if span.attributes.get("__root_span__") == "true" else None), + (span_id if span.attributes.get(LOCAL_ROOT_SPAN_MARKER) else None), datetime.fromtimestamp(span.start_time / 1e9, UTC).isoformat(), datetime.fromtimestamp(span.end_time / 1e9, UTC).isoformat(), ), diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 4ae68ee1d..c85722bdc 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -35,6 +35,9 @@ INVALID_SPAN_ID = 0x0000000000000000 INVALID_TRACE_ID = 0x00000000000000000000000000000000 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] +# The logical root span may not be visible to this process if a parent context +# is passed in. The local root span is the first local span in a trace. +LOCAL_ROOT_SPAN_MARKER = "__local_root_span__" def trace_id_to_str(trace_id: int) -> str: @@ -180,7 +183,13 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont trace_id = generate_trace_id() context = TraceContext(BACKGROUND_LOGGER, trace_id) - attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {}) + # Mark this span as the root for the trace for now. The processing of + # traceparent context if supplied comes later and will result in the + # ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root, + # i.e. the root of the spans originating in this process as this is + # needed to ensure that we insert this 'local' root span's id into + # the trace record in sqlite store. + attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {}) context.push_span(name, attributes) CURRENT_TRACE_CONTEXT.set(context)