diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index ef5603420..042d4e781 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -1071,6 +1071,16 @@ class StreamingResponseOrchestrator: # AllowedToolsFilter only has tool_names field (not allowed/disallowed) always_allowed = mcp_tool.allowed_tools.tool_names + # Prepare headers for MCP server with JWT injection + headers = dict(mcp_tool.headers or {}) + + # Inject JWT token from request context if available + if self.ctx.request_context and self.ctx.request_context.jwt_token: + # Only add Authorization header if not already present + if "Authorization" not in headers and "authorization" not in headers: + headers["Authorization"] = self.ctx.request_context.jwt_token + logger.info(f"JWT token forwarded to MCP server {mcp_tool.server_label}") + # Call list_mcp_tools tool_defs = None list_id = f"mcp_list_{uuid.uuid4()}" @@ -1082,7 +1092,7 @@ class StreamingResponseOrchestrator: async with tracing.span("list_mcp_tools", attributes): tool_defs = await list_mcp_tools( endpoint=mcp_tool.server_url, - headers=mcp_tool.headers or {}, + headers=headers, ) # Create the MCP list tools message diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py index 09a161d50..c74cea1d2 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -294,6 +294,19 @@ class ToolExecutor: from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool mcp_tool = mcp_tool_to_server[function_name] + + # Prepare headers for MCP tool invocation with JWT injection + headers = dict(mcp_tool.headers or {}) + + # Inject JWT token from request context if available + if ctx.request_context and ctx.request_context.jwt_token: + # Only add Authorization header if not already present + if "Authorization" not in headers and "authorization" not in headers: + headers["Authorization"] = ctx.request_context.jwt_token + logger.info( + f"JWT token forwarded to MCP server {mcp_tool.server_label} for tool {function_name}" + ) + attributes = { "server_label": mcp_tool.server_label, "server_url": mcp_tool.server_url, @@ -302,7 +315,7 @@ class ToolExecutor: async with tracing.span("invoke_mcp_tool", attributes): result = await invoke_mcp_tool( endpoint=mcp_tool.server_url, - headers=mcp_tool.headers or {}, + headers=headers, tool_name=function_name, kwargs=tool_kwargs, ) diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py index 3b9a14b01..279f193c0 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py @@ -29,6 +29,20 @@ from llama_stack.apis.agents.openai_responses import ( from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam +class RequestContext(BaseModel): + """Context information from incoming HTTP request. + + This holds authentication and other request-specific metadata + that needs to be passed through the execution chain. + """ + + jwt_token: str | None = None + """JWT token extracted from Authorization header for forwarding to MCP servers.""" + + additional_headers: dict[str, str] | None = None + """Additional headers that may need to be forwarded.""" + + class ToolExecutionResult(BaseModel): """Result of streaming tool execution.""" @@ -158,6 +172,7 @@ class ChatCompletionContext(BaseModel): temperature: float | None response_format: OpenAIResponseFormatParam tool_context: ToolContext | None + request_context: RequestContext | None = None approval_requests: list[OpenAIResponseMCPApprovalRequest] = [] approval_responses: dict[str, OpenAIResponseMCPApprovalResponse] = {} @@ -170,6 +185,7 @@ class ChatCompletionContext(BaseModel): response_format: OpenAIResponseFormatParam, tool_context: ToolContext, inputs: list[OpenAIResponseInput] | str, + request_context: RequestContext | None = None, ): super().__init__( model=model, @@ -178,6 +194,7 @@ class ChatCompletionContext(BaseModel): temperature=temperature, response_format=response_format, tool_context=tool_context, + request_context=request_context, ) if not isinstance(inputs, str): self.approval_requests = [input for input in inputs if input.type == "mcp_approval_request"] diff --git a/tests/integration/responses/test_mcp_jwt_passthrough.py b/tests/integration/responses/test_mcp_jwt_passthrough.py new file mode 100644 index 000000000..db87e86e4 --- /dev/null +++ b/tests/integration/responses/test_mcp_jwt_passthrough.py @@ -0,0 +1,134 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +import pytest + +from llama_stack import LlamaStackAsLibraryClient +from tests.common.mcp import make_mcp_server + +from .helpers import setup_mcp_tools + + +def test_mcp_jwt_passthrough_basic(compat_client, text_model_id, caplog): + """ + Test that JWT token is forwarded to MCP server for both list_tools and invoke_tool. + + This is the core test verifying JWT passthrough functionality works end-to-end. + """ + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + + # Create MCP server that requires JWT authentication + test_token = "test-jwt-token-123" + with make_mcp_server(required_auth_token=test_token) as mcp_server_info: + tools = setup_mcp_tools( + [ + { + "type": "mcp", + "server_label": "localmcp", + "server_url": "", + "headers": {"Authorization": f"Bearer {test_token}"}, + } + ], + mcp_server_info, + ) + + # Create response - JWT should be forwarded + response = compat_client.responses.create( + model=text_model_id, + input="What is the boiling point of myawesomeliquid?", + tools=tools, + stream=False, + ) + + # Verify list_tools succeeded + assert len(response.output) >= 3 + assert response.output[0].type == "mcp_list_tools" + assert response.output[0].server_label == "localmcp" + assert len(response.output[0].tools) == 2 + + # Verify tool invocation succeeded + assert response.output[1].type == "mcp_call" + assert response.output[1].name == "get_boiling_point" + assert response.output[1].error is None + assert "-100" in response.output[1].output + + +def test_mcp_jwt_passthrough_backward_compatibility(compat_client, text_model_id): + """ + Test that MCP tools work without JWT (backward compatibility). + + This ensures systems without JWT continue working as before. + """ + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + + # Create MCP server WITHOUT authentication requirement + with make_mcp_server(required_auth_token=None) as mcp_server_info: + tools = setup_mcp_tools( + [{"type": "mcp", "server_label": "localmcp", "server_url": ""}], mcp_server_info + ) + + # Create response without JWT - should still work + response = compat_client.responses.create( + model=text_model_id, + input="What is the boiling point of myawesomeliquid?", + tools=tools, + stream=False, + ) + + # Verify operation succeeded without JWT + assert len(response.output) >= 3 + assert response.output[0].type == "mcp_list_tools" + assert response.output[1].type == "mcp_call" + assert response.output[1].error is None + + +def test_mcp_jwt_passthrough_streaming(compat_client, text_model_id, caplog): + """ + Test that JWT token is forwarded during streaming responses. + """ + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + + # Create MCP server that requires JWT authentication + test_token = "test-streaming-jwt" + with make_mcp_server(required_auth_token=test_token) as mcp_server_info: + tools = setup_mcp_tools( + [ + { + "type": "mcp", + "server_label": "localmcp", + "server_url": "", + "headers": {"Authorization": f"Bearer {test_token}"}, + } + ], + mcp_server_info, + ) + + # Create streaming response - JWT should be forwarded + stream = compat_client.responses.create( + model=text_model_id, + input="What is the boiling point of myawesomeliquid?", + tools=tools, + stream=True, + ) + + # Collect all streaming chunks + chunks = list(stream) + assert len(chunks) > 0 + + # Get final response + final_chunk = chunks[-1] + assert hasattr(final_chunk, "response") + final_response = final_chunk.response + + # Verify MCP operations succeeded + assert len(final_response.output) >= 3 + assert final_response.output[0].type == "mcp_list_tools" + assert final_response.output[1].type == "mcp_call" + assert final_response.output[1].error is None