From 6389bf5ffb5ecc1d30a022cb74499f01d16418c1 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Tue, 7 Oct 2025 10:09:03 -0400
Subject: [PATCH] fix: make telemetry optional for agents (#3705)

# What does this PR do?

there is a lot of code in the agents API using the telemetry API and its
helpers without checking if that API is even enabled.

This is the only API besides inference actively using telemetry code, so
after this telemetry can be optional for the entire stack


resolves #3665


## Test Plan

existing agent tests.

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 .../inline/agents/meta_reference/__init__.py  |  1 +
 .../agents/meta_reference/agent_instance.py   | 86 +++++++++++--------
 .../inline/agents/meta_reference/agents.py    |  3 +
 llama_stack/providers/registry/agents.py      |  3 +
 4 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 334c32e15..37b0b50c8 100644
--- a/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -22,6 +22,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
         policy,
+        Api.telemetry in deps,
     )
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 207f0daec..c2ce9aa7b 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -110,6 +110,7 @@ class ChatAgent(ShieldRunnerMixin):
         persistence_store: KVStore,
         created_at: str,
         policy: list[AccessRule],
+        telemetry_enabled: bool = False,
     ):
         self.agent_id = agent_id
         self.agent_config = agent_config
@@ -120,6 +121,7 @@ class ChatAgent(ShieldRunnerMixin):
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
         self.created_at = created_at
+        self.telemetry_enabled = telemetry_enabled
 
         ShieldRunnerMixin.__init__(
             self,
@@ -188,28 +190,30 @@ class ChatAgent(ShieldRunnerMixin):
 
     async def create_and_execute_turn(self, request: AgentTurnCreateRequest) -> AsyncGenerator:
         turn_id = str(uuid.uuid4())
-        span = tracing.get_current_span()
-        if span:
-            span.set_attribute("session_id", request.session_id)
-            span.set_attribute("agent_id", self.agent_id)
-            span.set_attribute("request", request.model_dump_json())
-            span.set_attribute("turn_id", turn_id)
-            if self.agent_config.name:
-                span.set_attribute("agent_name", self.agent_config.name)
+        if self.telemetry_enabled:
+            span = tracing.get_current_span()
+            if span is not None:
+                span.set_attribute("session_id", request.session_id)
+                span.set_attribute("agent_id", self.agent_id)
+                span.set_attribute("request", request.model_dump_json())
+                span.set_attribute("turn_id", turn_id)
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools(request.toolgroups)
         async for chunk in self._run_turn(request, turn_id):
             yield chunk
 
     async def resume_turn(self, request: AgentTurnResumeRequest) -> AsyncGenerator:
-        span = tracing.get_current_span()
-        if span:
-            span.set_attribute("agent_id", self.agent_id)
-            span.set_attribute("session_id", request.session_id)
-            span.set_attribute("request", request.model_dump_json())
-            span.set_attribute("turn_id", request.turn_id)
-            if self.agent_config.name:
-                span.set_attribute("agent_name", self.agent_config.name)
+        if self.telemetry_enabled:
+            span = tracing.get_current_span()
+            if span is not None:
+                span.set_attribute("agent_id", self.agent_id)
+                span.set_attribute("session_id", request.session_id)
+                span.set_attribute("request", request.model_dump_json())
+                span.set_attribute("turn_id", request.turn_id)
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools()
         async for chunk in self._run_turn(request):
@@ -395,9 +399,12 @@ class ChatAgent(ShieldRunnerMixin):
         touchpoint: str,
     ) -> AsyncGenerator:
         async with tracing.span("run_shields") as span:
-            span.set_attribute("input", [m.model_dump_json() for m in messages])
+            if self.telemetry_enabled and span is not None:
+                span.set_attribute("input", [m.model_dump_json() for m in messages])
+                if len(shields) == 0:
+                    span.set_attribute("output", "no shields")
+
             if len(shields) == 0:
-                span.set_attribute("output", "no shields")
                 return
 
             step_id = str(uuid.uuid4())
@@ -430,7 +437,8 @@ class ChatAgent(ShieldRunnerMixin):
                         )
                     )
                 )
-                span.set_attribute("output", e.violation.model_dump_json())
+                if self.telemetry_enabled and span is not None:
+                    span.set_attribute("output", e.violation.model_dump_json())
 
                 yield CompletionMessage(
                     content=str(e),
@@ -453,7 +461,8 @@ class ChatAgent(ShieldRunnerMixin):
                     )
                 )
             )
-            span.set_attribute("output", "no violations")
+            if self.telemetry_enabled and span is not None:
+                span.set_attribute("output", "no violations")
 
     async def _run(
         self,
@@ -518,8 +527,9 @@ class ChatAgent(ShieldRunnerMixin):
             stop_reason: StopReason | None = None
 
             async with tracing.span("inference") as span:
-                if self.agent_config.name:
-                    span.set_attribute("agent_name", self.agent_config.name)
+                if self.telemetry_enabled and span is not None:
+                    if self.agent_config.name:
+                        span.set_attribute("agent_name", self.agent_config.name)
 
                 def _serialize_nested(value):
                     """Recursively serialize nested Pydantic models to dicts."""
@@ -637,18 +647,19 @@ class ChatAgent(ShieldRunnerMixin):
                     else:
                         raise ValueError(f"Unexpected delta type {type(delta)}")
 
-                span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
-                span.set_attribute(
-                    "input",
-                    json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
-                )
-                output_attr = json.dumps(
-                    {
-                        "content": content,
-                        "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
-                    }
-                )
-                span.set_attribute("output", output_attr)
+                if self.telemetry_enabled and span is not None:
+                    span.set_attribute("stop_reason", stop_reason or StopReason.end_of_turn)
+                    span.set_attribute(
+                        "input",
+                        json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
+                    )
+                    output_attr = json.dumps(
+                        {
+                            "content": content,
+                            "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
+                        }
+                    )
+                    span.set_attribute("output", output_attr)
 
             n_iter += 1
             await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
@@ -756,7 +767,9 @@ class ChatAgent(ShieldRunnerMixin):
                         {
                             "tool_name": tool_call.tool_name,
                             "input": message.model_dump_json(),
-                        },
+                        }
+                        if self.telemetry_enabled
+                        else {},
                     ) as span:
                         tool_execution_start_time = datetime.now(UTC).isoformat()
                         tool_result = await self.execute_tool_call_maybe(
@@ -771,7 +784,8 @@ class ChatAgent(ShieldRunnerMixin):
                             call_id=tool_call.call_id,
                             content=tool_result.content,
                         )
-                        span.set_attribute("output", result_message.model_dump_json())
+                        if self.telemetry_enabled and span is not None:
+                            span.set_attribute("output", result_message.model_dump_json())
 
                         # Store tool execution step
                         tool_execution_step = ToolExecutionStep(
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 5431e8f28..cfaf56a34 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -64,6 +64,7 @@ class MetaReferenceAgentsImpl(Agents):
         tool_runtime_api: ToolRuntime,
         tool_groups_api: ToolGroups,
         policy: list[AccessRule],
+        telemetry_enabled: bool = False,
     ):
         self.config = config
         self.inference_api = inference_api
@@ -71,6 +72,7 @@ class MetaReferenceAgentsImpl(Agents):
         self.safety_api = safety_api
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
+        self.telemetry_enabled = telemetry_enabled
 
         self.in_memory_store = InmemoryKVStoreImpl()
         self.openai_responses_impl: OpenAIResponsesImpl | None = None
@@ -135,6 +137,7 @@ class MetaReferenceAgentsImpl(Agents):
             ),
             created_at=agent_info.created_at,
             policy=self.policy,
+            telemetry_enabled=self.telemetry_enabled,
         )
 
     async def create_agent_session(
diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py
index 57110d129..bc46b4de2 100644
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@@ -36,6 +36,9 @@ def available_providers() -> list[ProviderSpec]:
                 Api.tool_runtime,
                 Api.tool_groups,
             ],
+            optional_api_dependencies=[
+                Api.telemetry,
+            ],
             description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
         ),
     ]