From f596f850bf7e5213e16eb494d91ca77e6480efd2 Mon Sep 17 00:00:00 2001 From: slekkala1 Date: Fri, 14 Nov 2025 13:14:49 -0800 Subject: [PATCH] fix: Propagate the runtime error message to user (#4150) # What does this PR do? For Runtime Exception the error is not propagated to the user and can be opaque. Before fix: `ERROR - Error processing message: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'} ` After fix: `[ERROR] Error code: 404 - {'detail': "Model 'claude-sonnet-4-5-20250929' not found. Use 'client.models.list()' to list available Models."} ` (Ran into this few times, while working with OCI + LLAMAStack and Sabre: Agentic framework integrations with LLAMAStack) ## Test Plan CI --- .../inline/agents/meta_reference/responses/streaming.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index c0b62958f..95c690147 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -16,6 +16,7 @@ from llama_stack_api import ( ApprovalFilter, Inference, MCPListToolsTool, + ModelNotFoundError, OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChatCompletionChunk, @@ -323,6 +324,8 @@ class StreamingResponseOrchestrator: if last_completion_result and last_completion_result.finish_reason == "length": final_status = "incomplete" + except ModelNotFoundError: + raise except Exception as exc: # noqa: BLE001 self.final_messages = messages.copy() self.sequence_number += 1