From f596f850bf7e5213e16eb494d91ca77e6480efd2 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Fri, 14 Nov 2025 13:14:49 -0800
Subject: [PATCH] fix: Propagate the runtime error message to user (#4150)

# What does this PR do?
For Runtime Exception the error is not propagated to the user and can be
opaque.
Before fix:
`ERROR - Error processing message: Error code: 500 - {'detail':
'Internal server error: An unexpected error occurred.'}
`
After fix:
`[ERROR] Error code: 404 - {'detail': "Model
'claude-sonnet-4-5-20250929' not found. Use 'client.models.list()' to
list available Models."}
`

(Ran into this few times, while working with OCI + LLAMAStack and Sabre:
Agentic framework integrations with LLAMAStack)

## Test Plan
CI
---
 .../inline/agents/meta_reference/responses/streaming.py        | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index c0b62958f..95c690147 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -16,6 +16,7 @@ from llama_stack_api import (
     ApprovalFilter,
     Inference,
     MCPListToolsTool,
+    ModelNotFoundError,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
@@ -323,6 +324,8 @@ class StreamingResponseOrchestrator:
             if last_completion_result and last_completion_result.finish_reason == "length":
                 final_status = "incomplete"
 
+        except ModelNotFoundError:
+            raise
         except Exception as exc:  # noqa: BLE001
             self.final_messages = messages.copy()
             self.sequence_number += 1