From 6389bf5ffb5ecc1d30a022cb74499f01d16418c1 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 7 Oct 2025 10:09:03 -0400 Subject: [PATCH] fix: make telemetry optional for agents (#3705) # What does this PR do? there is a lot of code in the agents API using the telemetry API and its helpers without checking if that API is even enabled. This is the only API besides inference actively using telemetry code, so after this telemetry can be optional for the entire stack resolves #3665 ## Test Plan existing agent tests. Signed-off-by: Charlie Doern --- .../inline/agents/meta_reference/__init__.py | 1 + .../agents/meta_reference/agent_instance.py | 86 +++++++++++-------- .../inline/agents/meta_reference/agents.py | 3 + llama_stack/providers/registry/agents.py | 3 + 4 files changed, 57 insertions(+), 36 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py index 334c32e15..37b0b50c8 100644 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap deps[Api.tool_runtime], deps[Api.tool_groups], policy, + Api.telemetry in deps, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 207f0daec..c2ce9aa7b 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -110,6 +110,7 @@ class ChatAgent(ShieldRunnerMixin): persistence_store: KVStore, created_at: str, policy: list[AccessRule], + telemetry_enabled: bool = False, ): self.agent_id = agent_id self.agent_config = agent_config @@ -120,6 +121,7 @@ class ChatAgent(ShieldRunnerMixin): self.tool_runtime_api = tool_runtime_api self.tool_groups_api = tool_groups_api self.created_at = created_at + self.telemetry_enabled = telemetry_enabled ShieldRunnerMixin.__init__( self, @@ -188,28 +190,30 @@ class ChatAgent(ShieldRunnerMixin): async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator: turn_id = str(uuid.uuid4()) - span = tracing.get_current_span() - if span: - span.set_attribute("session_id", request.session_id) - span.set_attribute("agent_id", self.agent_id) - span.set_attribute("request", request.model_dump_json()) - span.set_attribute("turn_id", turn_id) - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) + if self.telemetry_enabled: + span = tracing.get_current_span() + if span is not None: + span.set_attribute("session_id", request.session_id) + span.set_attribute("agent_id", self.agent_id) + span.set_attribute("request", request.model_dump_json()) + span.set_attribute("turn_id", turn_id) + if self.agent_config.name: + span.set_attribute("agent_name", self.agent_config.name) await self._initialize_tools(request.toolgroups) async for chunk in self._run_turn(request, turn_id): yield chunk async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator: - span = tracing.get_current_span() - if span: - span.set_attribute("agent_id", self.agent_id) - span.set_attribute("session_id", request.session_id) - span.set_attribute("request", request.model_dump_json()) - span.set_attribute("turn_id", request.turn_id) - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) + if self.telemetry_enabled: + span = tracing.get_current_span() + if span is not None: + span.set_attribute("agent_id", self.agent_id) + span.set_attribute("session_id", request.session_id) + span.set_attribute("request", request.model_dump_json()) + span.set_attribute("turn_id", request.turn_id) + if self.agent_config.name: + span.set_attribute("agent_name", self.agent_config.name) await self._initialize_tools() async for chunk in self._run_turn(request): @@ -395,9 +399,12 @@ class ChatAgent(ShieldRunnerMixin): touchpoint: str, ) -> AsyncGenerator: async with tracing.span("run_shields") as span: - span.set_attribute("input", [m.model_dump_json() for m in messages]) + if self.telemetry_enabled and span is not None: + span.set_attribute("input", [m.model_dump_json() for m in messages]) + if len(shields) == 0: + span.set_attribute("output", "no shields") + if len(shields) == 0: - span.set_attribute("output", "no shields") return step_id = str(uuid.uuid4()) @@ -430,7 +437,8 @@ class ChatAgent(ShieldRunnerMixin): ) ) ) - span.set_attribute("output", e.violation.model_dump_json()) + if self.telemetry_enabled and span is not None: + span.set_attribute("output", e.violation.model_dump_json()) yield CompletionMessage( content=str(e), @@ -453,7 +461,8 @@ class ChatAgent(ShieldRunnerMixin): ) ) ) - span.set_attribute("output", "no violations") + if self.telemetry_enabled and span is not None: + span.set_attribute("output", "no violations") async def _run( self, @@ -518,8 +527,9 @@ class ChatAgent(ShieldRunnerMixin): stop_reason: StopReason | None = None async with tracing.span("inference") as span: - if self.agent_config.name: - span.set_attribute("agent_name", self.agent_config.name) + if self.telemetry_enabled and span is not None: + if self.agent_config.name: + span.set_attribute("agent_name", self.agent_config.name) def _serialize_nested(value): """Recursively serialize nested Pydantic models to dicts.""" @@ -637,18 +647,19 @@ class ChatAgent(ShieldRunnerMixin): else: raise ValueError(f"Unexpected delta type {type(delta)}") - span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn) - span.set_attribute( - "input", - json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), - ) - output_attr = json.dumps( - { - "content": content, - "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], - } - ) - span.set_attribute("output", output_attr) + if self.telemetry_enabled and span is not None: + span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn) + span.set_attribute( + "input", + json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), + ) + output_attr = json.dumps( + { + "content": content, + "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], + } + ) + span.set_attribute("output", output_attr) n_iter += 1 await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter) @@ -756,7 +767,9 @@ class ChatAgent(ShieldRunnerMixin): { "tool_name": tool_call.tool_name, "input": message.model_dump_json(), - }, + } + if self.telemetry_enabled + else {}, ) as span: tool_execution_start_time = datetime.now(UTC).isoformat() tool_result = await self.execute_tool_call_maybe( @@ -771,7 +784,8 @@ class ChatAgent(ShieldRunnerMixin): call_id=tool_call.call_id, content=tool_result.content, ) - span.set_attribute("output", result_message.model_dump_json()) + if self.telemetry_enabled and span is not None: + span.set_attribute("output", result_message.model_dump_json()) # Store tool execution step tool_execution_step = ToolExecutionStep( diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 5431e8f28..cfaf56a34 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents): tool_runtime_api: ToolRuntime, tool_groups_api: ToolGroups, policy: list[AccessRule], + telemetry_enabled: bool = False, ): self.config = config self.inference_api = inference_api @@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents): self.safety_api = safety_api self.tool_runtime_api = tool_runtime_api self.tool_groups_api = tool_groups_api + self.telemetry_enabled = telemetry_enabled self.in_memory_store = InmemoryKVStoreImpl() self.openai_responses_impl: OpenAIResponsesImpl | None = None @@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents): ), created_at=agent_info.created_at, policy=self.policy, + telemetry_enabled=self.telemetry_enabled, ) async def create_agent_session( diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py index 57110d129..bc46b4de2 100644 --- a/llama_stack/providers/registry/agents.py +++ b/llama_stack/providers/registry/agents.py @@ -36,6 +36,9 @@ def available_providers() -> list[ProviderSpec]: Api.tool_runtime, Api.tool_groups, ], + optional_api_dependencies=[ + Api.telemetry, + ], description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", ), ]