LiteLLM Minor Fixes & Improvements (09/21/2024) (#5819)

* fix(router.py): fix error message * Litellm disable keys (#5814) * build(schema.prisma): allow blocking/unblocking keys Fixes https://github.com/BerriAI/litellm/issues/5328 * fix(key_management_endpoints.py): fix pop * feat(auth_checks.py): allow admin to enable/disable virtual keys Closes https://github.com/BerriAI/litellm/issues/5328 * docs(vertex.md): add auth section for vertex ai Addresses - https://github.com/BerriAI/litellm/issues/5768#issuecomment-2365284223 * build(model_prices_and_context_window.json): show which models support prompt_caching Closes https://github.com/BerriAI/litellm/issues/5776 * fix(router.py): allow setting default priority for requests * fix(router.py): add 'retry-after' header for concurrent request limit errors Fixes https://github.com/BerriAI/litellm/issues/5783 * fix(router.py): correctly raise and use retry-after header from azure+openai Fixes https://github.com/BerriAI/litellm/issues/5783 * fix(user_api_key_auth.py): fix valid token being none * fix(auth_checks.py): fix model dump for cache management object * fix(user_api_key_auth.py): pass prisma_client to obj * test(test_otel.py): update test for new key check * test: fix test
2025-04-26 19:24:27 +00:00 · 2024-09-21 18:51:53 -07:00 · 2024-09-21 18:51:53 -07:00 · f3fa2160a0
commit f3fa2160a0
parent f0543a6f9d
25 changed files with 1006 additions and 182 deletions
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@ -704,7 +704,6 @@ class OpenAIChatCompletion(BaseLLM):
        drop_params: Optional[bool] = None,
    ):
        super().completion()
-        exception_mapping_worked = False
        try:
            if headers:
                optional_params["extra_headers"] = headers
@ -911,6 +910,9 @@ class OpenAIChatCompletion(BaseLLM):
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
            raise OpenAIError(
                status_code=status_code, message=error_text, headers=error_headers
            )
@ -1003,8 +1005,12 @@ class OpenAIChatCompletion(BaseLLM):
                    raise e
                # e.message
            except Exception as e:
+                exception_response = getattr(e, "response", None)
                status_code = getattr(e, "status_code", 500)
                error_headers = getattr(e, "headers", None)
+                if error_headers is None and exception_response:
+                    error_headers = getattr(exception_response, "headers", None)
+
                raise OpenAIError(
                    status_code=status_code, message=str(e), headers=error_headers
                )
@ -1144,10 +1150,13 @@ class OpenAIChatCompletion(BaseLLM):
                    raise e

                error_headers = getattr(e, "headers", None)
+                status_code = getattr(e, "status_code", 500)
+                error_response = getattr(e, "response", None)
+                if error_headers is None and error_response:
+                    error_headers = getattr(error_response, "headers", None)
                if response is not None and hasattr(response, "text"):
-                    error_headers = getattr(e, "headers", None)
                    raise OpenAIError(
-                        status_code=500,
+                        status_code=status_code,
                        message=f"{str(e)}\n\nOriginal Response: {response.text}",  # type: ignore
                        headers=error_headers,
                    )
@ -1272,8 +1281,12 @@ class OpenAIChatCompletion(BaseLLM):
            )
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
            raise OpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=error_text, headers=error_headers
            )

    def embedding(  # type: ignore
@ -1352,8 +1365,12 @@ class OpenAIChatCompletion(BaseLLM):
        except Exception as e:
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
            raise OpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=error_text, headers=error_headers
            )

    async def aimage_generation(
@ -1774,7 +1791,15 @@ class OpenAITextCompletion(BaseLLM):
                ## RESPONSE OBJECT
                return TextCompletionResponse(**response_json)
        except Exception as e:
-            raise e
+            status_code = getattr(e, "status_code", 500)
+            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
+            raise OpenAIError(
+                status_code=status_code, message=error_text, headers=error_headers
+            )

    async def acompletion(
        self,
@ -1825,7 +1850,15 @@ class OpenAITextCompletion(BaseLLM):
            response_obj._hidden_params.original_response = json.dumps(response_json)
            return response_obj
        except Exception as e:
-            raise e
+            status_code = getattr(e, "status_code", 500)
+            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
+            raise OpenAIError(
+                status_code=status_code, message=error_text, headers=error_headers
+            )

    def streaming(
        self,
@ -1860,8 +1893,12 @@ class OpenAITextCompletion(BaseLLM):
        except Exception as e:
            status_code = getattr(e, "status_code", 500)
            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
            raise OpenAIError(
-                status_code=status_code, message=str(e), headers=error_headers
+                status_code=status_code, message=error_text, headers=error_headers
            )
        streamwrapper = CustomStreamWrapper(
            completion_stream=response,
@ -1871,8 +1908,19 @@ class OpenAITextCompletion(BaseLLM):
            stream_options=data.get("stream_options", None),
        )

-        for chunk in streamwrapper:
-            yield chunk
+        try:
+            for chunk in streamwrapper:
+                yield chunk
+        except Exception as e:
+            status_code = getattr(e, "status_code", 500)
+            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
+            raise OpenAIError(
+                status_code=status_code, message=error_text, headers=error_headers
+            )

    async def async_streaming(
        self,
@ -1910,8 +1958,19 @@ class OpenAITextCompletion(BaseLLM):
            stream_options=data.get("stream_options", None),
        )

-        async for transformed_chunk in streamwrapper:
-            yield transformed_chunk
+        try:
+            async for transformed_chunk in streamwrapper:
+                yield transformed_chunk
+        except Exception as e:
+            status_code = getattr(e, "status_code", 500)
+            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
+            raise OpenAIError(
+                status_code=status_code, message=error_text, headers=error_headers
+            )


 class OpenAIFilesAPI(BaseLLM):