diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 026ce6395..daac995ca 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -47,6 +47,7 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, ) from llama_stack.log import get_logger +from llama_stack.providers.utils.telemetry import tracing from .types import ChatCompletionContext, ChatCompletionResult from .utils import convert_chat_choice_to_response_message, is_function_tool_call @@ -597,14 +598,22 @@ class StreamingResponseOrchestrator: never_allowed = mcp_tool.allowed_tools.never # Call list_mcp_tools - tool_defs = await list_mcp_tools( - endpoint=mcp_tool.server_url, - headers=mcp_tool.headers or {}, - ) + tool_defs = None + list_id = f"mcp_list_{uuid.uuid4()}" + attributes = { + "server_label": mcp_tool.server_label, + "server_url": mcp_tool.server_url, + "mcp_list_tools_id": list_id, + } + async with tracing.span("list_mcp_tools", attributes): + tool_defs = await list_mcp_tools( + endpoint=mcp_tool.server_url, + headers=mcp_tool.headers or {}, + ) # Create the MCP list tools message mcp_list_message = OpenAIResponseOutputMessageMCPListTools( - id=f"mcp_list_{uuid.uuid4()}", + id=list_id, server_label=mcp_tool.server_label, tools=[], ) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py index b33b47454..0a9967fe5 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -35,6 +35,7 @@ from llama_stack.apis.inference import ( from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.log import get_logger +from llama_stack.providers.utils.telemetry import tracing from .types import ChatCompletionContext, ToolExecutionResult @@ -251,12 +252,18 @@ class ToolExecutor: from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool mcp_tool = mcp_tool_to_server[function_name] - result = await invoke_mcp_tool( - endpoint=mcp_tool.server_url, - headers=mcp_tool.headers or {}, - tool_name=function_name, - kwargs=tool_kwargs, - ) + attributes = { + "server_label": mcp_tool.server_label, + "server_url": mcp_tool.server_url, + "tool_name": function_name, + } + async with tracing.span("invoke_mcp_tool", attributes): + result = await invoke_mcp_tool( + endpoint=mcp_tool.server_url, + headers=mcp_tool.headers or {}, + tool_name=function_name, + kwargs=tool_kwargs, + ) elif function_name == "knowledge_search": response_file_search_tool = next( (t for t in ctx.response_tools if isinstance(t, OpenAIResponseInputToolFileSearch)), @@ -266,15 +273,20 @@ class ToolExecutor: # Use vector_stores.search API instead of knowledge_search tool # to support filters and ranking_options query = tool_kwargs.get("query", "") - result = await self._execute_knowledge_search_via_vector_store( - query=query, - response_file_search_tool=response_file_search_tool, - ) + async with tracing.span("knowledge_search", {}): + result = await self._execute_knowledge_search_via_vector_store( + query=query, + response_file_search_tool=response_file_search_tool, + ) else: - result = await self.tool_runtime_api.invoke_tool( - tool_name=function_name, - kwargs=tool_kwargs, - ) + attributes = { + "tool_name": function_name, + } + async with tracing.span("invoke_tool", attributes): + result = await self.tool_runtime_api.invoke_tool( + tool_name=function_name, + kwargs=tool_kwargs, + ) except Exception as e: error_exc = e