mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 11:02:36 +00:00
new types and working
This commit is contained in:
parent
b7c4997e91
commit
94b4113c63
6 changed files with 252 additions and 157 deletions
141
llama_stack/providers/utils/telemetry/jaeger.py
Normal file
141
llama_stack/providers/utils/telemetry/jaeger.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
|
||||
import aiohttp
|
||||
|
||||
from llama_stack.apis.telemetry import Span, SpanNode, Trace, TraceStore, TraceTree
|
||||
|
||||
|
||||
class JaegerTraceStore(TraceStore):
|
||||
def __init__(self, endpoint: str, service_name: str):
|
||||
self.endpoint = endpoint
|
||||
self.service_name = service_name
|
||||
|
||||
async def get_trace(self, trace_id: str) -> TraceTree:
|
||||
params = {
|
||||
"traceID": trace_id,
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"{self.endpoint}/{trace_id}", params=params
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(
|
||||
f"Failed to query Jaeger: {response.status} {await response.text()}"
|
||||
)
|
||||
|
||||
trace_data = await response.json()
|
||||
if not trace_data.get("data") or not trace_data["data"]:
|
||||
return None
|
||||
|
||||
# First pass: Build span map
|
||||
span_map = {}
|
||||
for jaeger_span in trace_data["data"][0]["spans"]:
|
||||
start_time = datetime.fromtimestamp(
|
||||
jaeger_span["startTime"] / 1000000
|
||||
)
|
||||
|
||||
# Some systems store end time directly in the span
|
||||
if "endTime" in jaeger_span:
|
||||
end_time = datetime.fromtimestamp(
|
||||
jaeger_span["endTime"] / 1000000
|
||||
)
|
||||
else:
|
||||
duration_microseconds = jaeger_span.get("duration", 0)
|
||||
duration_timedelta = timedelta(
|
||||
microseconds=duration_microseconds
|
||||
)
|
||||
end_time = start_time + duration_timedelta
|
||||
|
||||
span = Span(
|
||||
span_id=jaeger_span["spanID"],
|
||||
trace_id=trace_id,
|
||||
name=jaeger_span["operationName"],
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
parent_span_id=next(
|
||||
(
|
||||
ref["spanID"]
|
||||
for ref in jaeger_span.get("references", [])
|
||||
if ref["refType"] == "CHILD_OF"
|
||||
),
|
||||
None,
|
||||
),
|
||||
attributes={
|
||||
tag["key"]: tag["value"]
|
||||
for tag in jaeger_span.get("tags", [])
|
||||
},
|
||||
)
|
||||
|
||||
span_map[span.span_id] = SpanNode(span=span)
|
||||
|
||||
# Second pass: Build parent-child relationships
|
||||
root_node = None
|
||||
for span_node in span_map.values():
|
||||
parent_id = span_node.span.parent_span_id
|
||||
if parent_id and parent_id in span_map:
|
||||
span_map[parent_id].children.append(span_node)
|
||||
elif not parent_id:
|
||||
root_node = span_node
|
||||
|
||||
trace = Trace(
|
||||
trace_id=trace_id,
|
||||
root_span_id=root_node.span.span_id if root_node else "",
|
||||
start_time=(
|
||||
root_node.span.start_time if root_node else datetime.now()
|
||||
),
|
||||
end_time=root_node.span.end_time if root_node else None,
|
||||
)
|
||||
|
||||
return TraceTree(trace=trace, root=root_node)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error querying Jaeger trace structure: {str(e)}") from e
|
||||
|
||||
async def get_traces_for_sessions(self, session_ids: List[str]) -> List[Trace]:
|
||||
traces = []
|
||||
|
||||
# Fetch traces for each session ID individually
|
||||
for session_id in session_ids:
|
||||
params = {
|
||||
"service": self.service_name,
|
||||
"tags": f'{{"session_id":"{session_id}"}}',
|
||||
"limit": 100,
|
||||
"lookback": "10000h",
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(self.endpoint, params=params) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(
|
||||
f"Failed to query Jaeger: {response.status} {await response.text()}"
|
||||
)
|
||||
|
||||
traces_data = await response.json()
|
||||
seen_trace_ids = set()
|
||||
|
||||
for trace_data in traces_data.get("data", []):
|
||||
trace_id = trace_data.get("traceID")
|
||||
if trace_id and trace_id not in seen_trace_ids:
|
||||
seen_trace_ids.add(trace_id)
|
||||
traces.append(
|
||||
Trace(
|
||||
trace_id=trace_id,
|
||||
root_span_id="",
|
||||
start_time=datetime.now(),
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Error querying Jaeger traces: {str(e)}") from e
|
||||
|
||||
return traces
|
||||
Loading…
Add table
Add a link
Reference in a new issue