forked from phoenix-oss/llama-stack-mirror
# What does this PR do? Change the Telemetry API to be able to support different use cases like returning traces for the UI and ability to export for Evals. Other changes: * Add a new trace_protocol decorator to decorate all our API methods so that any call to them will automatically get traced across all impls. * There is some issue with the decorator pattern of span creation when using async generators, where there are multiple yields with in the same context. I think its much more explicit by using the explicit context manager pattern using with. I moved the span creations in agent instance to be using with * Inject session id at the turn level, which should quickly give us all traces across turns for a given session Addresses #509 ## Test Plan ``` llama stack run /Users/dineshyv/.llama/distributions/llamastack-together/together-run.yaml PYTHONPATH=. python -m examples.agents.rag_with_memory_bank localhost 5000 curl -X POST 'http://localhost:5000/alpha/telemetry/query-traces' \ -H 'Content-Type: application/json' \ -d '{ "attribute_filters": [ { "key": "session_id", "op": "eq", "value": "dd667b87-ca4b-4d30-9265-5a0de318fc65" }], "limit": 100, "offset": 0, "order_by": ["start_time"] }' | jq . [ { "trace_id": "6902f54b83b4b48be18a6f422b13e16f", "root_span_id": "5f37b85543afc15a", "start_time": "2024-12-04T08:08:30.501587", "end_time": "2024-12-04T08:08:36.026463" }, { "trace_id": "92227dac84c0615ed741be393813fb5f", "root_span_id": "af7c5bb46665c2c8", "start_time": "2024-12-04T08:08:36.031170", "end_time": "2024-12-04T08:08:41.693301" }, { "trace_id": "7d578a6edac62f204ab479fba82f77b6", "root_span_id": "1d935e3362676896", "start_time": "2024-12-04T08:08:41.695204", "end_time": "2024-12-04T08:08:47.228016" }, { "trace_id": "dbd767d76991bc816f9f078907dc9ff2", "root_span_id": "f5a7ee76683b9602", "start_time": "2024-12-04T08:08:47.234578", "end_time": "2024-12-04T08:08:53.189412" } ] curl -X POST 'http://localhost:5000/alpha/telemetry/get-span-tree' \ -H 'Content-Type: application/json' \ -d '{ "span_id" : "6cceb4b48a156913", "max_depth": 2, "attributes_to_return": ["input"] }' | jq . % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 875 100 790 100 85 18462 1986 --:--:-- --:--:-- --:--:-- 20833 { "span_id": "6cceb4b48a156913", "trace_id": "dafa796f6aaf925f511c04cd7c67fdda", "parent_span_id": "892a66d726c7f990", "name": "retrieve_rag_context", "start_time": "2024-12-04T09:28:21.781995", "end_time": "2024-12-04T09:28:21.913352", "attributes": { "input": [ "{\"role\":\"system\",\"content\":\"You are a helpful assistant\"}", "{\"role\":\"user\",\"content\":\"What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.\",\"context\":null}" ] }, "children": [ { "span_id": "1a2df181854064a8", "trace_id": "dafa796f6aaf925f511c04cd7c67fdda", "parent_span_id": "6cceb4b48a156913", "name": "MemoryRouter.query_documents", "start_time": "2024-12-04T09:28:21.787620", "end_time": "2024-12-04T09:28:21.906512", "attributes": { "input": null }, "children": [], "status": "ok" } ], "status": "ok" } ``` <img width="1677" alt="Screenshot 2024-12-04 at 9 42 56 AM" src="https://github.com/user-attachments/assets/4d3cea93-05ce-415a-93d9-4b1628631bf8">
128 lines
4.4 KiB
Python
128 lines
4.4 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import asyncio
|
|
import inspect
|
|
import json
|
|
from functools import wraps
|
|
from typing import Any, AsyncGenerator, Callable, Type, TypeVar
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from llama_stack.providers.utils.telemetry import tracing
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
def serialize_value(value: Any) -> str:
|
|
"""Helper function to serialize values to string representation."""
|
|
try:
|
|
if isinstance(value, BaseModel):
|
|
return value.model_dump_json()
|
|
elif isinstance(value, list) and value and isinstance(value[0], BaseModel):
|
|
return json.dumps([item.model_dump_json() for item in value])
|
|
elif hasattr(value, "to_dict"):
|
|
return json.dumps(value.to_dict())
|
|
elif isinstance(value, (dict, list, int, float, str, bool)):
|
|
return json.dumps(value)
|
|
else:
|
|
return str(value)
|
|
except Exception:
|
|
return str(value)
|
|
|
|
|
|
def trace_protocol(cls: Type[T]) -> Type[T]:
|
|
"""
|
|
A class decorator that automatically traces all methods in a protocol/base class
|
|
and its inheriting classes.
|
|
"""
|
|
|
|
def trace_method(method: Callable) -> Callable:
|
|
is_async = asyncio.iscoroutinefunction(method)
|
|
is_async_gen = inspect.isasyncgenfunction(method)
|
|
|
|
def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple:
|
|
class_name = self.__class__.__name__
|
|
method_name = method.__name__
|
|
|
|
span_type = (
|
|
"async_generator" if is_async_gen else "async" if is_async else "sync"
|
|
)
|
|
span_attributes = {
|
|
"class": class_name,
|
|
"method": method_name,
|
|
"type": span_type,
|
|
"args": serialize_value(args),
|
|
}
|
|
|
|
return class_name, method_name, span_attributes
|
|
|
|
@wraps(method)
|
|
async def async_gen_wrapper(
|
|
self: Any, *args: Any, **kwargs: Any
|
|
) -> AsyncGenerator:
|
|
class_name, method_name, span_attributes = create_span_context(
|
|
self, *args, **kwargs
|
|
)
|
|
|
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
try:
|
|
count = 0
|
|
async for item in method(self, *args, **kwargs):
|
|
yield item
|
|
count += 1
|
|
finally:
|
|
span.set_attribute("chunk_count", count)
|
|
|
|
@wraps(method)
|
|
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
class_name, method_name, span_attributes = create_span_context(
|
|
self, *args, **kwargs
|
|
)
|
|
|
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
try:
|
|
result = await method(self, *args, **kwargs)
|
|
span.set_attribute("output", serialize_value(result))
|
|
return result
|
|
except Exception as e:
|
|
span.set_attribute("error", str(e))
|
|
raise
|
|
|
|
@wraps(method)
|
|
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
class_name, method_name, span_attributes = create_span_context(
|
|
self, *args, **kwargs
|
|
)
|
|
|
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
try:
|
|
result = method(self, *args, **kwargs)
|
|
span.set_attribute("output", serialize_value(result))
|
|
return result
|
|
except Exception as e:
|
|
raise
|
|
|
|
if is_async_gen:
|
|
return async_gen_wrapper
|
|
elif is_async:
|
|
return async_wrapper
|
|
else:
|
|
return sync_wrapper
|
|
|
|
original_init_subclass = getattr(cls, "__init_subclass__", None)
|
|
|
|
def __init_subclass__(cls_child, **kwargs): # noqa: N807
|
|
if original_init_subclass:
|
|
original_init_subclass(**kwargs)
|
|
|
|
for name, method in vars(cls_child).items():
|
|
if inspect.isfunction(method) and not name.startswith("_"):
|
|
setattr(cls_child, name, trace_method(method)) # noqa: B010
|
|
|
|
cls.__init_subclass__ = classmethod(__init_subclass__)
|
|
|
|
return cls
|