From 359d3eeff2c2b316d7ef96bc0de18db008816f8d Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 9 Oct 2025 17:40:55 -0700 Subject: [PATCH] fix(inference): propagate 401 errors Remote provider authentication errors (401/403) were being converted to 500 Internal Server Error, hiding the real cause from users. Now checks if exceptions have a status_code attribute and preserves it. This fixes authentication error handling for all remote inference providers using OpenAI SDK (groq, openai, together, fireworks, etc.) and similar provider SDKs. Before: - HTTP 500: "Internal server error: An unexpected error occurred." After: - HTTP 401: "Error code: 401 - Invalid API Key" Fixes #2990 Test Plan: 1. Build stack: llama stack build --image-type venv --providers inference=remote::groq 2. Start stack: llama stack run 3. Send request with invalid API key via x-llamastack-provider-data header 4. Verify response is 401 with provider error message (not 500) 5. Repeat for openai, together, fireworks providers --- llama_stack/core/server/server.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index e19092816..edc114381 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -138,6 +138,13 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro return HTTPException(status_code=httpx.codes.NOT_IMPLEMENTED, detail=f"Not implemented: {str(exc)}") elif isinstance(exc, AuthenticationRequiredError): return HTTPException(status_code=httpx.codes.UNAUTHORIZED, detail=f"Authentication required: {str(exc)}") + elif hasattr(exc, "status_code") and isinstance(getattr(exc, "status_code", None), int): + # Handle provider SDK exceptions (e.g., OpenAI's APIStatusError and subclasses) + # These include AuthenticationError (401), PermissionDeniedError (403), etc. + # This preserves the actual HTTP status code from the provider + status_code = exc.status_code + detail = str(exc) + return HTTPException(status_code=status_code, detail=detail) else: return HTTPException( status_code=httpx.codes.INTERNAL_SERVER_ERROR,