mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
rootcause and just throw
This commit is contained in:
parent
c40597bee3
commit
189251b56f
5 changed files with 4 additions and 102 deletions
|
|
@ -1,68 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(name=__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RuntimeErrorRule:
|
||||
code: str
|
||||
default_message: str
|
||||
regex: re.Pattern[str] | None = None
|
||||
template: str | None = None
|
||||
|
||||
def evaluate(self, error_msg: str) -> str | None:
|
||||
"""Return the sanitized message if this rule matches, otherwise None."""
|
||||
if self.regex and (match := self.regex.search(error_msg)):
|
||||
if self.template:
|
||||
try:
|
||||
return self.template.format(**match.groupdict())
|
||||
except Exception: # pragma: no cover - defensive
|
||||
logger.debug("Failed to format sanitized runtime error message", exc_info=True)
|
||||
return self.default_message
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SanitizedRuntimeError:
|
||||
code: str
|
||||
message: str
|
||||
|
||||
|
||||
MODEL_NOT_FOUND_REGEX = re.compile(r"model ['\"]?(?P<model>[^'\" ]+)['\"]? not found", re.IGNORECASE)
|
||||
|
||||
|
||||
RUNTIME_ERROR_RULES: tuple[RuntimeErrorRule, ...] = (
|
||||
RuntimeErrorRule(
|
||||
code="MODEL_NOT_FOUND",
|
||||
default_message="Requested model is unavailable.",
|
||||
regex=MODEL_NOT_FOUND_REGEX,
|
||||
template="Requested model '{model}' is unavailable.",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def sanitize_runtime_error(
|
||||
error: RuntimeError, rules: Iterable[RuntimeErrorRule] = RUNTIME_ERROR_RULES
|
||||
) -> SanitizedRuntimeError | None:
|
||||
"""
|
||||
Map internal RuntimeError messages to stable, user-safe error codes/messages.
|
||||
"""
|
||||
message = str(error)
|
||||
|
||||
for rule in rules:
|
||||
sanitized_message = rule.evaluate(message)
|
||||
if sanitized_message:
|
||||
return SanitizedRuntimeError(code=rule.code, message=sanitized_message)
|
||||
|
||||
return None
|
||||
|
|
@ -45,7 +45,6 @@ from llama_stack.core.request_headers import (
|
|||
user_from_scope,
|
||||
)
|
||||
from llama_stack.core.server.routes import get_all_api_routes
|
||||
from llama_stack.core.server.runtime_error_sanitizer import sanitize_runtime_error
|
||||
from llama_stack.core.stack import (
|
||||
Stack,
|
||||
cast_image_name_to_string,
|
||||
|
|
@ -138,16 +137,9 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
|
|||
detail = str(exc)
|
||||
return HTTPException(status_code=status_code, detail=detail)
|
||||
else:
|
||||
detail = "Internal server error: An unexpected error occurred."
|
||||
if isinstance(exc, RuntimeError):
|
||||
sanitized = sanitize_runtime_error(exc)
|
||||
if sanitized:
|
||||
logger.warning("RuntimeError sanitized as %s: %s", sanitized.code, str(exc))
|
||||
detail = f"{sanitized.code}: {sanitized.message}"
|
||||
|
||||
return HTTPException(
|
||||
status_code=httpx.codes.INTERNAL_SERVER_ERROR,
|
||||
detail=detail,
|
||||
detail="Internal server error: An unexpected error occurred.",
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -363,7 +363,6 @@ class OpenAIResponsesImpl:
|
|||
|
||||
# Structured outputs
|
||||
response_format = await convert_response_text_to_chat_response_format(text)
|
||||
|
||||
ctx = ChatCompletionContext(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from llama_stack_api import (
|
|||
ApprovalFilter,
|
||||
Inference,
|
||||
MCPListToolsTool,
|
||||
ModelNotFoundError,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
|
|
@ -323,6 +324,8 @@ class StreamingResponseOrchestrator:
|
|||
if last_completion_result and last_completion_result.finish_reason == "length":
|
||||
final_status = "incomplete"
|
||||
|
||||
except ModelNotFoundError:
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001
|
||||
self.final_messages = messages.copy()
|
||||
self.sequence_number += 1
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.core.server.runtime_error_sanitizer import sanitize_runtime_error
|
||||
|
||||
|
||||
def test_model_not_found_is_sanitized():
|
||||
err = RuntimeError("OpenAI response failed: Model 'claude-sonnet-4-5-20250929' not found.")
|
||||
|
||||
sanitized = sanitize_runtime_error(err)
|
||||
|
||||
assert sanitized.code == "MODEL_NOT_FOUND"
|
||||
assert sanitized.message == "Requested model 'claude-sonnet-4-5-20250929' is unavailable."
|
||||
|
||||
|
||||
def test_unmapped_runtime_error_defaults_to_internal_error():
|
||||
err = RuntimeError("Unexpected failure in obscure subsystem")
|
||||
|
||||
sanitized = sanitize_runtime_error(err)
|
||||
|
||||
assert sanitized is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue