consolidate telemetry to meta reference inline

This commit is contained in:
Dinesh Yeduguru 2024-12-03 16:25:20 -08:00
parent cb49d21a49
commit 5d0c502cdb
21 changed files with 667 additions and 722 deletions

View file

@ -1,141 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from datetime import datetime, timedelta
from typing import List
import aiohttp
from llama_stack.apis.telemetry import Span, SpanNode, Trace, TraceStore, TraceTree
class JaegerTraceStore(TraceStore):
def __init__(self, endpoint: str, service_name: str):
self.endpoint = endpoint
self.service_name = service_name
async def get_trace(self, trace_id: str) -> TraceTree:
params = {
"traceID": trace_id,
}
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"{self.endpoint}/{trace_id}", params=params
) as response:
if response.status != 200:
raise Exception(
f"Failed to query Jaeger: {response.status} {await response.text()}"
)
trace_data = await response.json()
if not trace_data.get("data") or not trace_data["data"]:
return None
# First pass: Build span map
span_map = {}
for jaeger_span in trace_data["data"][0]["spans"]:
start_time = datetime.fromtimestamp(
jaeger_span["startTime"] / 1000000
)
# Some systems store end time directly in the span
if "endTime" in jaeger_span:
end_time = datetime.fromtimestamp(
jaeger_span["endTime"] / 1000000
)
else:
duration_microseconds = jaeger_span.get("duration", 0)
duration_timedelta = timedelta(
microseconds=duration_microseconds
)
end_time = start_time + duration_timedelta
span = Span(
span_id=jaeger_span["spanID"],
trace_id=trace_id,
name=jaeger_span["operationName"],
start_time=start_time,
end_time=end_time,
parent_span_id=next(
(
ref["spanID"]
for ref in jaeger_span.get("references", [])
if ref["refType"] == "CHILD_OF"
),
None,
),
attributes={
tag["key"]: tag["value"]
for tag in jaeger_span.get("tags", [])
},
)
span_map[span.span_id] = SpanNode(span=span)
# Second pass: Build parent-child relationships
root_node = None
for span_node in span_map.values():
parent_id = span_node.span.parent_span_id
if parent_id and parent_id in span_map:
span_map[parent_id].children.append(span_node)
elif not parent_id:
root_node = span_node
trace = Trace(
trace_id=trace_id,
root_span_id=root_node.span.span_id if root_node else "",
start_time=(
root_node.span.start_time if root_node else datetime.now()
),
end_time=root_node.span.end_time if root_node else None,
)
return TraceTree(trace=trace, root=root_node)
except Exception as e:
raise Exception(f"Error querying Jaeger trace structure: {str(e)}") from e
async def get_traces_for_sessions(self, session_ids: List[str]) -> List[Trace]:
traces = []
# Fetch traces for each session ID individually
for session_id in session_ids:
params = {
"service": self.service_name,
"tags": f'{{"session_id":"{session_id}"}}',
"limit": 100,
"lookback": "10000h",
}
try:
async with aiohttp.ClientSession() as session:
async with session.get(self.endpoint, params=params) as response:
if response.status != 200:
raise Exception(
f"Failed to query Jaeger: {response.status} {await response.text()}"
)
traces_data = await response.json()
seen_trace_ids = set()
for trace_data in traces_data.get("data", []):
trace_id = trace_data.get("traceID")
if trace_id and trace_id not in seen_trace_ids:
seen_trace_ids.add(trace_id)
traces.append(
Trace(
trace_id=trace_id,
root_span_id="",
start_time=datetime.now(),
)
)
except Exception as e:
raise Exception(f"Error querying Jaeger traces: {str(e)}") from e
return traces

View file

@ -1,114 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
from datetime import datetime
from typing import List, Optional
import psycopg2
from llama_stack.apis.telemetry import Span, SpanNode, Trace, TraceStore, TraceTree
class PostgresTraceStore(TraceStore):
def __init__(self, conn_string: str):
self.conn_string = conn_string
async def get_trace(self, trace_id: str) -> Optional[TraceTree]:
try:
with psycopg2.connect(self.conn_string) as conn:
with conn.cursor() as cur:
# Fetch all spans for the trace
cur.execute(
"""
SELECT trace_id, span_id, parent_span_id, name,
start_time, end_time, attributes
FROM traces
WHERE trace_id = %s
""",
(trace_id,),
)
spans_data = cur.fetchall()
if not spans_data:
return None
# First pass: Build span map
span_map = {}
for span_data in spans_data:
# Ensure attributes is a string before parsing
attributes = span_data[6]
if isinstance(attributes, dict):
attributes = json.dumps(attributes)
span = Span(
span_id=span_data[1],
trace_id=span_data[0],
name=span_data[3],
start_time=span_data[4],
end_time=span_data[5],
parent_span_id=span_data[2],
attributes=json.loads(
attributes
), # Now safely parse the JSON string
)
span_map[span.span_id] = SpanNode(span=span)
# Second pass: Build parent-child relationships
root_node = None
for span_node in span_map.values():
parent_id = span_node.span.parent_span_id
if parent_id and parent_id in span_map:
span_map[parent_id].children.append(span_node)
elif not parent_id:
root_node = span_node
trace = Trace(
trace_id=trace_id,
root_span_id=root_node.span.span_id if root_node else "",
start_time=(
root_node.span.start_time if root_node else datetime.now()
),
end_time=root_node.span.end_time if root_node else None,
)
return TraceTree(trace=trace, root=root_node)
except Exception as e:
raise Exception(
f"Error querying PostgreSQL trace structure: {str(e)}"
) from e
async def get_traces_for_sessions(self, session_ids: List[str]) -> List[Trace]:
traces = []
try:
with psycopg2.connect(self.conn_string) as conn:
with conn.cursor() as cur:
# Query traces for all session IDs
cur.execute(
"""
SELECT DISTINCT trace_id, MIN(start_time) as start_time
FROM traces
WHERE attributes->>'session_id' = ANY(%s)
GROUP BY trace_id
""",
(session_ids,),
)
traces_data = cur.fetchall()
for trace_data in traces_data:
traces.append(
Trace(
trace_id=trace_data[0],
root_span_id="",
start_time=trace_data[1],
)
)
except Exception as e:
raise Exception(f"Error querying PostgreSQL traces: {str(e)}") from e
return traces

View file

@ -0,0 +1,157 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
from datetime import datetime
from typing import List, Optional
import aiosqlite
from llama_stack.apis.telemetry import (
MaterializedSpan,
QueryCondition,
Trace,
TraceStore,
)
class SQLiteTraceStore(TraceStore):
def __init__(self, conn_string: str):
self.conn_string = conn_string
async def query_traces(
self,
attribute_conditions: Optional[List[QueryCondition]] = None,
attribute_keys_to_return: Optional[List[str]] = None,
limit: Optional[int] = 100,
offset: Optional[int] = 0,
order_by: Optional[List[str]] = None,
) -> List[Trace]:
# Build the SQL query with attribute selection
select_clause = """
SELECT DISTINCT t.trace_id, t.root_span_id, t.start_time, t.end_time
"""
if attribute_keys_to_return:
for key in attribute_keys_to_return:
select_clause += (
f", json_extract(s.attributes, '$.{key}') as attr_{key}"
)
query = (
select_clause
+ """
FROM traces t
JOIN spans s ON t.trace_id = s.trace_id
"""
)
params = []
# Add attribute conditions if present
if attribute_conditions:
conditions = []
for condition in attribute_conditions:
conditions.append(
f"json_extract(s.attributes, '$.{condition.key}') {condition.op} ?"
)
params.append(condition.value)
if conditions:
query += " WHERE " + " AND ".join(conditions)
# Add ordering
if order_by:
order_clauses = []
for field in order_by:
desc = False
if field.startswith("-"):
field = field[1:]
desc = True
order_clauses.append(f"t.{field} {'DESC' if desc else 'ASC'}")
query += " ORDER BY " + ", ".join(order_clauses)
# Add limit and offset
query += f" LIMIT {limit} OFFSET {offset}"
async with aiosqlite.connect(self.conn_string) as conn:
conn.row_factory = aiosqlite.Row
async with conn.execute(query, params) as cursor:
rows = await cursor.fetchall()
return [
Trace(
trace_id=row["trace_id"],
root_span_id=row["root_span_id"],
start_time=datetime.fromisoformat(row["start_time"]),
end_time=datetime.fromisoformat(row["end_time"]),
)
for row in rows
]
async def get_materialized_span(
self,
span_id: str,
attribute_keys_to_return: Optional[List[str]] = None,
max_depth: Optional[int] = None,
) -> MaterializedSpan:
# Build the attributes selection
attributes_select = "s.attributes"
if attribute_keys_to_return:
json_object = ", ".join(
f"'{key}', json_extract(s.attributes, '$.{key}')"
for key in attribute_keys_to_return
)
attributes_select = f"json_object({json_object})"
# SQLite CTE query with filtered attributes
query = f"""
WITH RECURSIVE span_tree AS (
SELECT s.*, 1 as depth, {attributes_select} as filtered_attributes
FROM spans s
WHERE s.span_id = ?
UNION ALL
SELECT s.*, st.depth + 1, {attributes_select} as filtered_attributes
FROM spans s
JOIN span_tree st ON s.parent_span_id = st.span_id
WHERE (? IS NULL OR st.depth < ?)
)
SELECT *
FROM span_tree
ORDER BY depth, start_time
"""
async with aiosqlite.connect(self.conn_string) as conn:
conn.row_factory = aiosqlite.Row
async with conn.execute(query, (span_id, max_depth, max_depth)) as cursor:
rows = await cursor.fetchall()
if not rows:
raise ValueError(f"Span {span_id} not found")
# Build span tree
spans_by_id = {}
root_span = None
for row in rows:
span = MaterializedSpan(
span_id=row["span_id"],
trace_id=row["trace_id"],
parent_span_id=row["parent_span_id"],
name=row["name"],
start_time=datetime.fromisoformat(row["start_time"]),
end_time=datetime.fromisoformat(row["end_time"]),
attributes=json.loads(row["filtered_attributes"]),
status=row["status"].lower(),
children=[],
)
spans_by_id[span.span_id] = span
if span.span_id == span_id:
root_span = span
elif span.parent_span_id in spans_by_id:
spans_by_id[span.parent_span_id].children.append(span)
return root_span