update

2025-12-03 09:53:45 +00:00 · 2025-11-13 13:09:57 -08:00 · 2025-11-13 13:09:57 -08:00 · 33c7bd6c09
commit 33c7bd6c09
parent 8d63cb405d
4 changed files with 112 additions and 6 deletions
--- a/src/llama_stack/core/server/runtime_error_sanitizer.py
+++ b/src/llama_stack/core/server/runtime_error_sanitizer.py
@ -0,0 +1,77 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import re
 from collections.abc import Iterable
 from dataclasses import dataclass
 from llama_stack.log import get_logger
 logger = get_logger(name=__name__)
@dataclass(frozen=True)
 class RuntimeErrorRule:
    code: str
    default_message: str
    substrings: tuple[str, ...] = ()
    regex: re.Pattern[str] | None = None
    template: str | None = None
    def evaluate(self, error_msg: str) -> str | None:
        """
        Returns the sanitized message if the rule matches, otherwise None.
        """
        if self.regex:
            match = self.regex.search(error_msg)
            if match:
                if self.template:
                    try:
                        return self.template.format(**match.groupdict())
                    except Exception:  # pragma: no cover - defensive
                        logger.debug("Failed to format sanitized runtime error message", exc_info=True)
                return self.default_message
        lowered = error_msg.lower()
        if self.substrings and all(pattern in lowered for pattern in self.substrings):
            return self.default_message
        return None
@dataclass(frozen=True)
 class SanitizedRuntimeError:
    code: str
    message: str
 MODEL_NOT_FOUND_REGEX = re.compile(r"model ['\"]?(?P<model>[^'\" ]+)['\"]? not found", re.IGNORECASE)
 RUNTIME_ERROR_RULES: tuple[RuntimeErrorRule, ...] = (
    RuntimeErrorRule(
        code="MODEL_NOT_FOUND",
        default_message="Requested model is unavailable.",
        regex=MODEL_NOT_FOUND_REGEX,
        template="Requested model '{model}' is unavailable.",
    ),
 )
 def sanitize_runtime_error(
    error: RuntimeError, rules: Iterable[RuntimeErrorRule] = RUNTIME_ERROR_RULES
 ) -> SanitizedRuntimeError | None:
    """
    Map internal RuntimeError messages to stable, user-safe error codes/messages.
    """
    message = str(error)
    for rule in rules:
        sanitized_message = rule.evaluate(message)
        if sanitized_message:
            return SanitizedRuntimeError(code=rule.code, message=sanitized_message)
    return None
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@ -45,6 +45,7 @@ from llama_stack.core.request_headers import (
    user_from_scope,
 )
 from llama_stack.core.server.routes import get_all_api_routes
 from llama_stack.core.server.runtime_error_sanitizer import sanitize_runtime_error
 from llama_stack.core.stack import (
    Stack,
    cast_image_name_to_string,
@ -129,9 +130,6 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
        return HTTPException(status_code=httpx.codes.NOT_IMPLEMENTED, detail=f"Not implemented: {str(exc)}")
    elif isinstance(exc, AuthenticationRequiredError):
        return HTTPException(status_code=httpx.codes.UNAUTHORIZED, detail=f"Authentication required: {str(exc)}")
    elif isinstance(exc, RuntimeError):
        # Preserve the actual RuntimeError message for diagnosability
        return HTTPException(status_code=httpx.codes.INTERNAL_SERVER_ERROR, detail=str(exc))
    elif hasattr(exc, "status_code") and isinstance(getattr(exc, "status_code", None), int):
        # Handle provider SDK exceptions (e.g., OpenAI's APIStatusError and subclasses)
        # These include AuthenticationError (401), PermissionDeniedError (403), etc.
@ -140,9 +138,16 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
        detail = str(exc)
        return HTTPException(status_code=status_code, detail=detail)
    else:
        detail = "Internal server error: An unexpected error occurred."
        if isinstance(exc, RuntimeError):
            sanitized = sanitize_runtime_error(exc)
            if sanitized:
                logger.warning("RuntimeError sanitized as %s: %s", sanitized.code, str(exc))
                detail = f"{sanitized.code}: {sanitized.message}"
        return HTTPException(
            status_code=httpx.codes.INTERNAL_SERVER_ERROR,
-            detail="Internal server error: An unexpected error occurred.",
+            detail=detail,
        )
--- a/tests/unit/server/test_runtime_error_sanitizer.py
+++ b/tests/unit/server/test_runtime_error_sanitizer.py
@ -0,0 +1,24 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.core.server.runtime_error_sanitizer import sanitize_runtime_error
 def test_model_not_found_is_sanitized():
    err = RuntimeError("OpenAI response failed: Model 'claude-sonnet-4-5-20250929' not found.")
    sanitized = sanitize_runtime_error(err)
    assert sanitized.code == "MODEL_NOT_FOUND"
    assert sanitized.message == "Requested model 'claude-sonnet-4-5-20250929' is unavailable."
 def test_unmapped_runtime_error_defaults_to_internal_error():
    err = RuntimeError("Unexpected failure in obscure subsystem")
    sanitized = sanitize_runtime_error(err)
    assert sanitized is None
--- a/tests/unit/server/test_server.py
+++ b/tests/unit/server/test_server.py
@ -165,13 +165,13 @@ class TestTranslateException:
        assert result.detail == "Internal server error: An unexpected error occurred."
    def test_translate_runtime_error(self):
-        """Test that RuntimeError is translated to 500 HTTP status with actual error message."""
+        """Test that RuntimeError without a sanitizer rule returns generic server error."""
        exc = RuntimeError("Runtime error")
        result = translate_exception(exc)
        assert isinstance(result, HTTPException)
        assert result.status_code == 500
-        assert result.detail == "Runtime error"
+        assert result.detail == "Internal server error: An unexpected error occurred."
    def test_multiple_access_denied_scenarios(self):
        """Test various scenarios that should result in 403 status codes."""