forked from phoenix-oss/llama-stack-mirror
Original telemetry outputs for agent turns look like this. Note: how output was a `str(message)` making it difficult to read them back for downstream tasks ( eg. building eval datasets ) ``` { │ │ 'input': [ │ │ │ '{"role":"system","content":"You are a helpful assistant. Use search tool to answer the questions. "}', │ │ │ '{"role":"user","content":"Which teams played in the NBA western conference finals of 2024","context":null}' │ │ ], │ │ 'output': "content: tool_calls: [ToolCall(call_id='8b7294ec-a83f-4798-ad8f-6bed662f08b6', tool_name=<BuiltinTool.brave_search: 'brave_search'>, arguments={'query': 'NBA Western Conference Finals 2024 teams'})]" │ }, ``` Updated the outputs to be structured . ## Test ```python import uuid from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig model_id = "meta-llama/Llama-3.1-8B-Instruct" agent_config = AgentConfig( model=model_id, instructions="You are a helpful assistant who will use the web search tools to help with answering questions.\nOnly provide final answer in short without writing full sentences. Use web search", toolgroups=["builtin::websearch"], enable_session_persistence=True, ) agent = Agent(client, agent_config) session_id = agent.create_session(uuid.uuid4().hex) response = agent.create_turn( messages=[ { "role": "user", "content": "latest news about llama stack", } ], session_id=session_id, stream=False, ) pprint(response) ``` Output: ``` Turn( │ input_messages=[UserMessage(content='latest news about llama stack', role='user', context=None)], │ output_message=CompletionMessage( │ │ content="The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.", │ │ role='assistant', │ │ stop_reason='end_of_turn', │ │ tool_calls=[] │ ), │ session_id='77379546-4598-485a-b4f4-84e5da28c513', │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 43, 915243, tzinfo=TzInfo(-08:00)), │ steps=[ │ │ InferenceStep( │ │ │ api_model_response=CompletionMessage( │ │ │ │ content='', │ │ │ │ role='assistant', │ │ │ │ stop_reason='end_of_turn', │ │ │ │ tool_calls=[ │ │ │ │ │ ToolCall( │ │ │ │ │ │ arguments={'query': 'latest news llama stack'}, │ │ │ │ │ │ call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b', │ │ │ │ │ │ tool_name='brave_search' │ │ │ │ │ ) │ │ │ │ ] │ │ │ ), │ │ │ step_id='81c16bd3-eb00-4721-8edc-f386e07391a3', │ │ │ step_type='inference', │ │ │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ │ │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 44, 637149, tzinfo=TzInfo(-08:00)), │ │ │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 43, 915831, tzinfo=TzInfo(-08:00)) │ │ ), │ │ ToolExecutionStep( │ │ │ step_id='4782d609-a62e-45f5-8d2a-25a43db46288', │ │ │ step_type='tool_execution', │ │ │ tool_calls=[ │ │ │ │ ToolCall( │ │ │ │ │ arguments={'query': 'latest news llama stack'}, │ │ │ │ │ call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b', │ │ │ │ │ tool_name='brave_search' │ │ │ │ ) │ │ │ ], │ │ │ tool_responses=[ │ │ │ │ ToolResponse( │ │ │ │ │ call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b', │ │ │ │ │ content='{"query": "latest news llama stack", "top_k": [{"title": "Llama 3.2: Revol. ....... Hacker News.", "score": 0.6186197, "raw_content": null}]}', │ │ │ │ │ tool_name='brave_search', │ │ │ │ │ metadata=None │ │ │ │ ) │ │ │ ], │ │ │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ │ │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 46, 272176, tzinfo=TzInfo(-08:00)), │ │ │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 44, 640743, tzinfo=TzInfo(-08:00)) │ │ ), │ │ InferenceStep( │ │ │ api_model_response=CompletionMessage( │ │ │ │ content="The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.", │ │ │ │ role='assistant', │ │ │ │ stop_reason='end_of_turn', │ │ │ │ tool_calls=[] │ │ │ ), │ │ │ step_id='37994419-5da3-4e84-a010-8d9b85366262', │ │ │ step_type='inference', │ │ │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ │ │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 48, 961275, tzinfo=TzInfo(-08:00)), │ │ │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 46, 273168, tzinfo=TzInfo(-08:00)) │ │ ) │ ], │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 48, 962318, tzinfo=TzInfo(-08:00)), │ output_attachments=[] ) ``` ## Check for Telemetry ```python agent_logs = [] for span in client.telemetry.query_spans( attribute_filters=[ {"key": "session_id", "op": "eq", "value": session_id}, ], attributes_to_return=['input', 'output'], ): agent_logs.append(span.attributes) pprint(json.loads(agent_logs[-1]['output'])) ``` ``` { │ 'content': "The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.", │ 'tool_calls': [] } ```
143 lines
5.2 KiB
Python
143 lines
5.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import asyncio
|
|
import inspect
|
|
import json
|
|
from functools import wraps
|
|
from typing import Any, AsyncGenerator, Callable, Type, TypeVar
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from llama_stack.models.llama.datatypes import Primitive
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
def serialize_value(value: Any) -> Primitive:
|
|
return str(_prepare_for_json(value))
|
|
|
|
|
|
def _prepare_for_json(value: Any) -> str:
|
|
"""Serialize a single value into JSON-compatible format."""
|
|
if value is None:
|
|
return ""
|
|
elif isinstance(value, (str, int, float, bool)):
|
|
return value
|
|
elif hasattr(value, "_name_"):
|
|
return value._name_
|
|
elif isinstance(value, BaseModel):
|
|
return json.loads(value.model_dump_json())
|
|
elif isinstance(value, (list, tuple, set)):
|
|
return [_prepare_for_json(item) for item in value]
|
|
elif isinstance(value, dict):
|
|
return {str(k): _prepare_for_json(v) for k, v in value.items()}
|
|
else:
|
|
try:
|
|
json.dumps(value)
|
|
return value
|
|
except Exception:
|
|
return str(value)
|
|
|
|
|
|
def trace_protocol(cls: Type[T]) -> Type[T]:
|
|
"""
|
|
A class decorator that automatically traces all methods in a protocol/base class
|
|
and its inheriting classes.
|
|
"""
|
|
|
|
def trace_method(method: Callable) -> Callable:
|
|
is_async = asyncio.iscoroutinefunction(method)
|
|
is_async_gen = inspect.isasyncgenfunction(method)
|
|
|
|
def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple:
|
|
class_name = self.__class__.__name__
|
|
method_name = method.__name__
|
|
span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
|
|
sig = inspect.signature(method)
|
|
param_names = list(sig.parameters.keys())[1:] # Skip 'self'
|
|
combined_args = {}
|
|
for i, arg in enumerate(args):
|
|
param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
|
|
combined_args[param_name] = serialize_value(arg)
|
|
for k, v in kwargs.items():
|
|
combined_args[str(k)] = serialize_value(v)
|
|
|
|
span_attributes = {
|
|
"__autotraced__": True,
|
|
"__class__": class_name,
|
|
"__method__": method_name,
|
|
"__type__": span_type,
|
|
"__args__": str(combined_args),
|
|
}
|
|
|
|
return class_name, method_name, span_attributes
|
|
|
|
@wraps(method)
|
|
async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator:
|
|
from llama_stack.providers.utils.telemetry import tracing
|
|
|
|
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
|
|
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
try:
|
|
count = 0
|
|
async for item in method(self, *args, **kwargs):
|
|
yield item
|
|
count += 1
|
|
finally:
|
|
span.set_attribute("chunk_count", count)
|
|
|
|
@wraps(method)
|
|
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
from llama_stack.providers.utils.telemetry import tracing
|
|
|
|
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
|
|
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
try:
|
|
result = await method(self, *args, **kwargs)
|
|
span.set_attribute("output", serialize_value(result))
|
|
return result
|
|
except Exception as e:
|
|
span.set_attribute("error", str(e))
|
|
raise
|
|
|
|
@wraps(method)
|
|
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
from llama_stack.providers.utils.telemetry import tracing
|
|
|
|
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
|
|
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
|
try:
|
|
result = method(self, *args, **kwargs)
|
|
span.set_attribute("output", serialize_value(result))
|
|
return result
|
|
except Exception as e:
|
|
span.set_attribute("error", str(e))
|
|
raise
|
|
|
|
if is_async_gen:
|
|
return async_gen_wrapper
|
|
elif is_async:
|
|
return async_wrapper
|
|
else:
|
|
return sync_wrapper
|
|
|
|
original_init_subclass = getattr(cls, "__init_subclass__", None)
|
|
|
|
def __init_subclass__(cls_child, **kwargs): # noqa: N807
|
|
if original_init_subclass:
|
|
original_init_subclass(**kwargs)
|
|
|
|
for name, method in vars(cls_child).items():
|
|
if inspect.isfunction(method) and not name.startswith("_"):
|
|
setattr(cls_child, name, trace_method(method)) # noqa: B010
|
|
|
|
cls.__init_subclass__ = classmethod(__init_subclass__)
|
|
|
|
return cls
|