llama-stack-mirror/llama_stack/providers/utils/telemetry/trace_protocol.py
Hardik Shah 8efa53daf1
fix: Agent telemetry inputs/outputs should be structured (#1302)
Original telemetry outputs for agent turns look like this. 
Note: how output was a `str(message)` making it difficult to read them
back for downstream tasks ( eg. building eval datasets )
```
{
│   │   'input': [
│   │   │   '{"role":"system","content":"You are a helpful assistant. Use search tool to answer the questions. "}',
│   │   │   '{"role":"user","content":"Which teams played in the NBA western conference finals of 2024","context":null}'
│   │   ],
│   │   'output': "content:  tool_calls: [ToolCall(call_id='8b7294ec-a83f-4798-ad8f-6bed662f08b6', tool_name=<BuiltinTool.brave_search: 'brave_search'>, arguments={'query': 'NBA Western Conference Finals 2024 teams'})]"
│   },
``` 

Updated the outputs to be structured .

## Test 

```python
import uuid

from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client.types.agent_create_params import AgentConfig

model_id = "meta-llama/Llama-3.1-8B-Instruct"
agent_config = AgentConfig(
    model=model_id,
    instructions="You are a helpful assistant who will use the web search tools to help with answering questions.\nOnly provide final answer in short without writing full sentences. Use web search",
    toolgroups=["builtin::websearch"],
    enable_session_persistence=True,
)

agent = Agent(client, agent_config)

session_id = agent.create_session(uuid.uuid4().hex)
response = agent.create_turn(
    messages=[
        {
            "role": "user",
            "content": "latest news about llama stack",
        }
    ],
    session_id=session_id,
    stream=False,
)

pprint(response)
```
Output: 
```
Turn(
│   input_messages=[UserMessage(content='latest news about llama stack', role='user', context=None)],
│   output_message=CompletionMessage(
│   │   content="The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.",
│   │   role='assistant',
│   │   stop_reason='end_of_turn',
│   │   tool_calls=[]
│   ),
│   session_id='77379546-4598-485a-b4f4-84e5da28c513',
│   started_at=datetime.datetime(2025, 2, 27, 11, 2, 43, 915243, tzinfo=TzInfo(-08:00)),
│   steps=[
│   │   InferenceStep(
│   │   │   api_model_response=CompletionMessage(
│   │   │   │   content='',
│   │   │   │   role='assistant',
│   │   │   │   stop_reason='end_of_turn',
│   │   │   │   tool_calls=[
│   │   │   │   │   ToolCall(
│   │   │   │   │   │   arguments={'query': 'latest news llama stack'},
│   │   │   │   │   │   call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b',
│   │   │   │   │   │   tool_name='brave_search'
│   │   │   │   │   )
│   │   │   │   ]
│   │   │   ),
│   │   │   step_id='81c16bd3-eb00-4721-8edc-f386e07391a3',
│   │   │   step_type='inference',
│   │   │   turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45',
│   │   │   completed_at=datetime.datetime(2025, 2, 27, 11, 2, 44, 637149, tzinfo=TzInfo(-08:00)),
│   │   │   started_at=datetime.datetime(2025, 2, 27, 11, 2, 43, 915831, tzinfo=TzInfo(-08:00))
│   │   ),
│   │   ToolExecutionStep(
│   │   │   step_id='4782d609-a62e-45f5-8d2a-25a43db46288',
│   │   │   step_type='tool_execution',
│   │   │   tool_calls=[
│   │   │   │   ToolCall(
│   │   │   │   │   arguments={'query': 'latest news llama stack'},
│   │   │   │   │   call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b',
│   │   │   │   │   tool_name='brave_search'
│   │   │   │   )
│   │   │   ],
│   │   │   tool_responses=[
│   │   │   │   ToolResponse(
│   │   │   │   │   call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b',
│   │   │   │   │   content='{"query": "latest news llama stack", "top_k": [{"title": "Llama 3.2: Revol. .......  Hacker News.", "score": 0.6186197, "raw_content": null}]}',
│   │   │   │   │   tool_name='brave_search',
│   │   │   │   │   metadata=None
│   │   │   │   )
│   │   │   ],
│   │   │   turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45',
│   │   │   completed_at=datetime.datetime(2025, 2, 27, 11, 2, 46, 272176, tzinfo=TzInfo(-08:00)),
│   │   │   started_at=datetime.datetime(2025, 2, 27, 11, 2, 44, 640743, tzinfo=TzInfo(-08:00))
│   │   ),
│   │   InferenceStep(
│   │   │   api_model_response=CompletionMessage(
│   │   │   │   content="The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.",
│   │   │   │   role='assistant',
│   │   │   │   stop_reason='end_of_turn',
│   │   │   │   tool_calls=[]
│   │   │   ),
│   │   │   step_id='37994419-5da3-4e84-a010-8d9b85366262',
│   │   │   step_type='inference',
│   │   │   turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45',
│   │   │   completed_at=datetime.datetime(2025, 2, 27, 11, 2, 48, 961275, tzinfo=TzInfo(-08:00)),
│   │   │   started_at=datetime.datetime(2025, 2, 27, 11, 2, 46, 273168, tzinfo=TzInfo(-08:00))
│   │   )
│   ],
│   turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45',
│   completed_at=datetime.datetime(2025, 2, 27, 11, 2, 48, 962318, tzinfo=TzInfo(-08:00)),
│   output_attachments=[]
)

```

## Check for Telemetry 
```python 

agent_logs = []
for span in client.telemetry.query_spans(
    attribute_filters=[
      {"key": "session_id", "op": "eq", "value": session_id},
    ],
    attributes_to_return=['input', 'output'],
):
    agent_logs.append(span.attributes)

pprint(json.loads(agent_logs[-1]['output']))
```
```
{
│   'content': "The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.",
│   'tool_calls': []
}
```
2025-02-27 23:06:37 -08:00

143 lines
5.2 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
import inspect
import json
from functools import wraps
from typing import Any, AsyncGenerator, Callable, Type, TypeVar
from pydantic import BaseModel
from llama_stack.models.llama.datatypes import Primitive
T = TypeVar("T")
def serialize_value(value: Any) -> Primitive:
return str(_prepare_for_json(value))
def _prepare_for_json(value: Any) -> str:
"""Serialize a single value into JSON-compatible format."""
if value is None:
return ""
elif isinstance(value, (str, int, float, bool)):
return value
elif hasattr(value, "_name_"):
return value._name_
elif isinstance(value, BaseModel):
return json.loads(value.model_dump_json())
elif isinstance(value, (list, tuple, set)):
return [_prepare_for_json(item) for item in value]
elif isinstance(value, dict):
return {str(k): _prepare_for_json(v) for k, v in value.items()}
else:
try:
json.dumps(value)
return value
except Exception:
return str(value)
def trace_protocol(cls: Type[T]) -> Type[T]:
"""
A class decorator that automatically traces all methods in a protocol/base class
and its inheriting classes.
"""
def trace_method(method: Callable) -> Callable:
is_async = asyncio.iscoroutinefunction(method)
is_async_gen = inspect.isasyncgenfunction(method)
def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple:
class_name = self.__class__.__name__
method_name = method.__name__
span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
sig = inspect.signature(method)
param_names = list(sig.parameters.keys())[1:] # Skip 'self'
combined_args = {}
for i, arg in enumerate(args):
param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
combined_args[param_name] = serialize_value(arg)
for k, v in kwargs.items():
combined_args[str(k)] = serialize_value(v)
span_attributes = {
"__autotraced__": True,
"__class__": class_name,
"__method__": method_name,
"__type__": span_type,
"__args__": str(combined_args),
}
return class_name, method_name, span_attributes
@wraps(method)
async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator:
from llama_stack.providers.utils.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
try:
count = 0
async for item in method(self, *args, **kwargs):
yield item
count += 1
finally:
span.set_attribute("chunk_count", count)
@wraps(method)
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
from llama_stack.providers.utils.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
try:
result = await method(self, *args, **kwargs)
span.set_attribute("output", serialize_value(result))
return result
except Exception as e:
span.set_attribute("error", str(e))
raise
@wraps(method)
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
from llama_stack.providers.utils.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
try:
result = method(self, *args, **kwargs)
span.set_attribute("output", serialize_value(result))
return result
except Exception as e:
span.set_attribute("error", str(e))
raise
if is_async_gen:
return async_gen_wrapper
elif is_async:
return async_wrapper
else:
return sync_wrapper
original_init_subclass = getattr(cls, "__init_subclass__", None)
def __init_subclass__(cls_child, **kwargs): # noqa: N807
if original_init_subclass:
original_init_subclass(**kwargs)
for name, method in vars(cls_child).items():
if inspect.isfunction(method) and not name.startswith("_"):
setattr(cls_child, name, trace_method(method)) # noqa: B010
cls.__init_subclass__ = classmethod(__init_subclass__)
return cls