Litellm dev 01 20 2025 p1 (#7884)

* fix(initial-test-to-return-api-timeout-value-in-openai-timeout-exception): Makes it easier for user to debug why request timed out * feat(openai.py): return timeout value + time taken on openai timeout errors helps debug timeout errors * fix(utils.py): fix num retries extraction logic when num_retries = 0 * fix(config_settings.md): litellm_logging.py support printing payload to console if 'LITELLM_PRINT_STANDARD_LOGGING_PAYLOAD' is true Enables easier debug * test(test_auth_checks.py'): remove common checks userapikeyauth enforcement check * fix(litellm_logging.py): fix linting error
2025-04-25 18:54:30 +00:00 · 2025-01-20 21:45:48 -08:00 · 2025-01-20 21:45:48 -08:00 · 4b23420a20
commit 4b23420a20
parent 806df5d31c
11 changed files with 65 additions and 46 deletions
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -1,4 +1,5 @@
 import hashlib
+import time
 import types
 from typing import (
    Any,
@ -390,12 +391,14 @@ class OpenAIChatCompletion(BaseLLM):
        - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
        - call chat.completions.create by default
        """
+        start_time = time.time()
        try:
            raw_response = (
                await openai_aclient.chat.completions.with_raw_response.create(
                    **data, timeout=timeout
                )
            )
+            end_time = time.time()

            if hasattr(raw_response, "headers"):
                headers = dict(raw_response.headers)
@ -403,6 +406,11 @@ class OpenAIChatCompletion(BaseLLM):
                headers = {}
            response = raw_response.parse()
            return headers, response
+        except openai.APITimeoutError as e:
+            end_time = time.time()
+            time_delta = round(end_time - start_time, 2)
+            e.message += f" - timeout value={timeout}, time taken={time_delta} seconds"
+            raise e
        except Exception as e:
            raise e

@ -521,6 +529,7 @@ class OpenAIChatCompletion(BaseLLM):
            for _ in range(
                2
            ):  # if call fails due to alternating messages, retry with reformatted message
+
                if provider_config is not None:
                    data = provider_config.transform_request(
                        model=model,
@ -725,6 +734,7 @@ class OpenAIChatCompletion(BaseLLM):
        for _ in range(
            2
        ):  # if call fails due to alternating messages, retry with reformatted message
+
            try:
                openai_aclient: AsyncOpenAI = self._get_openai_client(  # type: ignore
                    is_async=True,
@ -792,9 +802,10 @@ class OpenAIChatCompletion(BaseLLM):
                error_headers = getattr(e, "headers", None)
                if error_headers is None and exception_response:
                    error_headers = getattr(exception_response, "headers", None)
+                message = getattr(e, "message", str(e))

                raise OpenAIError(
-                    status_code=status_code, message=str(e), headers=error_headers
+                    status_code=status_code, message=message, headers=error_headers
                )

    def streaming(