diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index c0b62958f..95c690147 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -16,6 +16,7 @@ from llama_stack_api import ( ApprovalFilter, Inference, MCPListToolsTool, + ModelNotFoundError, OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChatCompletionChunk, @@ -323,6 +324,8 @@ class StreamingResponseOrchestrator: if last_completion_result and last_completion_result.finish_reason == "length": final_status = "incomplete" + except ModelNotFoundError: + raise except Exception as exc: # noqa: BLE001 self.final_messages = messages.copy() self.sequence_number += 1