chore(responses): Refactor Responses Impl to be civilized (#3138)

# What does this PR do? Refactors the OpenAI responses implementation by extracting streaming and tool execution logic into separate modules. This improves code organization by: 1. Creating a new `StreamingResponseOrchestrator` class in `streaming.py` to handle the streaming response generation logic 2. Moving tool execution functionality to a dedicated `ToolExecutor` class in `tool_executor.py` ## Test Plan Existing tests
2025-12-04 02:03:44 +00:00 · 2025-08-15 00:05:35 +00:00 · 2025-08-15 00:05:35 +00:00 · 47d5af703c
commit 47d5af703c
parent e69acbafbf
10 changed files with 1434 additions and 1156 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -48,8 +48,8 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore

 from .agent_instance import ChatAgent
 from .config import MetaReferenceAgentsImplConfig
-from .openai_responses import OpenAIResponsesImpl
 from .persistence import AgentInfo
+from .responses.openai_responses import OpenAIResponsesImpl

 logger = logging.getLogger()