diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py index 334c32e15..37b0b50c8 100644 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap deps[Api.tool_runtime], deps[Api.tool_groups], policy, + Api.telemetry in deps, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 207f0daec..c2ce9aa7b 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -110,6 +110,7 @@ class ChatAgent(ShieldRunnerMixin): persistence_store: KVStore, created_at: str, policy: list[AccessRule], + telemetry_enabled: bool = False, ): self.agent_id = agent_id self.agent_config = agent_config @@ -120,6 +121,7 @@ class ChatAgent(ShieldRunnerMixin): self.tool_runtime_api = tool_runtime_api self.tool_groups_api = tool_groups_api self.created_at = created_at + self.telemetry_enabled = telemetry_enabled ShieldRunnerMixin.__init__( self, @@ -188,28 +190,30 @@ class ChatAgent(ShieldRunnerMixin): async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator: turn_id = str(uuid.uuid4()) - span = tracing.get_current_span() - if span: - span.set_attribute("session_id", request.session_id) - span.set_attribute("agent_id", self.agent_id) - span.set_attribute("request", request.model_dump_json()) - span.set_attribute("turn_id", turn_id) - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) + if self.telemetry_enabled: + span = tracing.get_current_span() + if span is not None: + span.set_attribute("session_id", request.session_id) + span.set_attribute("agent_id", self.agent_id) + span.set_attribute("request", request.model_dump_json()) + span.set_attribute("turn_id", turn_id) + if self.agent_config.name: + span.set_attribute("agent_name", self.agent_config.name) await self._initialize_tools(request.toolgroups) async for chunk in self._run_turn(request, turn_id): yield chunk async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator: - span = tracing.get_current_span() - if span: - span.set_attribute("agent_id", self.agent_id) - span.set_attribute("session_id", request.session_id) - span.set_attribute("request", request.model_dump_json()) - span.set_attribute("turn_id", request.turn_id) - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) + if self.telemetry_enabled: + span = tracing.get_current_span() + if span is not None: + span.set_attribute("agent_id", self.agent_id) + span.set_attribute("session_id", request.session_id) + span.set_attribute("request", request.model_dump_json()) + span.set_attribute("turn_id", request.turn_id) + if self.agent_config.name: + span.set_attribute("agent_name", self.agent_config.name) await self._initialize_tools() async for chunk in self._run_turn(request): @@ -395,9 +399,12 @@ class ChatAgent(ShieldRunnerMixin): touchpoint: str, ) -> AsyncGenerator: async with tracing.span("run_shields") as span: - span.set_attribute("input", [m.model_dump_json() for m in messages]) + if self.telemetry_enabled and span is not None: + span.set_attribute("input", [m.model_dump_json() for m in messages]) + if len(shields) == 0: + span.set_attribute("output", "no shields") + if len(shields) == 0: - span.set_attribute("output", "no shields") return step_id = str(uuid.uuid4()) @@ -430,7 +437,8 @@ class ChatAgent(ShieldRunnerMixin): ) ) ) - span.set_attribute("output", e.violation.model_dump_json()) + if self.telemetry_enabled and span is not None: + span.set_attribute("output", e.violation.model_dump_json()) yield CompletionMessage( content=str(e), @@ -453,7 +461,8 @@ class ChatAgent(ShieldRunnerMixin): ) ) ) - span.set_attribute("output", "no violations") + if self.telemetry_enabled and span is not None: + span.set_attribute("output", "no violations") async def _run( self, @@ -518,8 +527,9 @@ class ChatAgent(ShieldRunnerMixin): stop_reason: StopReason | None = None async with tracing.span("inference") as span: - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) + if self.telemetry_enabled and span is not None: + if self.agent_config.name: + span.set_attribute("agent_name", self.agent_config.name) def _serialize_nested(value): """Recursively serialize nested Pydantic models to dicts.""" @@ -637,18 +647,19 @@ class ChatAgent(ShieldRunnerMixin): else: raise ValueError(f"Unexpected delta type {type(delta)}") - span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn) - span.set_attribute( - "input", - json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), - ) - output_attr = json.dumps( - { - "content": content, - "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], - } - ) - span.set_attribute("output", output_attr) + if self.telemetry_enabled and span is not None: + span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn) + span.set_attribute( + "input", + json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), + ) + output_attr = json.dumps( + { + "content": content, + "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], + } + ) + span.set_attribute("output", output_attr) n_iter += 1 await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter) @@ -756,7 +767,9 @@ class ChatAgent(ShieldRunnerMixin): { "tool_name": tool_call.tool_name, "input": message.model_dump_json(), - }, + } + if self.telemetry_enabled + else {}, ) as span: tool_execution_start_time = datetime.now(UTC).isoformat() tool_result = await self.execute_tool_call_maybe( @@ -771,7 +784,8 @@ class ChatAgent(ShieldRunnerMixin): call_id=tool_call.call_id, content=tool_result.content, ) - span.set_attribute("output", result_message.model_dump_json()) + if self.telemetry_enabled and span is not None: + span.set_attribute("output", result_message.model_dump_json()) # Store tool execution step tool_execution_step = ToolExecutionStep( diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 5431e8f28..cfaf56a34 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents): tool_runtime_api: ToolRuntime, tool_groups_api: ToolGroups, policy: list[AccessRule], + telemetry_enabled: bool = False, ): self.config = config self.inference_api = inference_api @@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents): self.safety_api = safety_api self.tool_runtime_api = tool_runtime_api self.tool_groups_api = tool_groups_api + self.telemetry_enabled = telemetry_enabled self.in_memory_store = InmemoryKVStoreImpl() self.openai_responses_impl: OpenAIResponsesImpl | None = None @@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents): ), created_at=agent_info.created_at, policy=self.policy, + telemetry_enabled=self.telemetry_enabled, ) async def create_agent_session( diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py index 57110d129..bc46b4de2 100644 --- a/llama_stack/providers/registry/agents.py +++ b/llama_stack/providers/registry/agents.py @@ -36,6 +36,9 @@ def available_providers() -> list[ProviderSpec]: Api.tool_runtime, Api.tool_groups, ], + optional_api_dependencies=[ + Api.telemetry, + ], description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", ), ]