fix(router.py): add more deployment timeout debug information for tim… (#8523)

* fix(router.py): add more deployment timeout debug information for timeout errors help understand why some calls in high-traffic don't respect their model-specific timeouts * test(test_convert_dict_to_response.py): unit test ensuring empty str is not converted to None Addresses https://github.com/BerriAI/litellm/issues/8507 * fix(convert_dict_to_response.py): handle empty message str - don't return back as 'None' Fixes https://github.com/BerriAI/litellm/issues/8507 * test(test_completion.py): add e2e test
2025-04-25 18:54:30 +00:00 · 2025-02-13 17:10:22 -08:00 · 2025-02-13 17:10:22 -08:00 · c399232b2c
commit c399232b2c
parent b70981b8fb
5 changed files with 234 additions and 60 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -573,20 +573,32 @@ class Router:
            litellm.amoderation, call_type="moderation"
        )

-
    def discard(self):
        """
        Pseudo-destructor to be invoked to clean up global data structures when router is no longer used.
        For now, unhook router's callbacks from all lists
        """
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm._async_success_callback, self)
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.success_callback, self)
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm._async_failure_callback, self)
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.failure_callback, self)
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.input_callback, self)
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.service_callback, self)
-        litellm.logging_callback_manager.remove_callback_from_list_by_object(litellm.callbacks, self)
-
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm._async_success_callback, self
+        )
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm.success_callback, self
+        )
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm._async_failure_callback, self
+        )
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm.failure_callback, self
+        )
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm.input_callback, self
+        )
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm.service_callback, self
+        )
+        litellm.logging_callback_manager.remove_callback_from_list_by_object(
+            litellm.callbacks, self
+        )

    def _update_redis_cache(self, cache: RedisCache):
        """
@ -602,7 +614,6 @@ class Router:
        if self.cache.redis_cache is None:
            self.cache.redis_cache = cache

-
    def initialize_assistants_endpoint(self):
        ## INITIALIZE PASS THROUGH ASSISTANTS ENDPOINT ##
        self.acreate_assistants = self.factory_function(litellm.acreate_assistants)
@ -902,6 +913,9 @@ class Router:
        - in the semaphore,  make a check against it's local rpm before running
        """
        model_name = None
+        _timeout_debug_deployment_dict = (
+            {}
+        )  # this is a temporary dict to debug timeout issues
        try:
            verbose_router_logger.debug(
                f"Inside _acompletion()- model: {model}; kwargs: {kwargs}"
@ -914,6 +928,7 @@ class Router:
                specific_deployment=kwargs.pop("specific_deployment", None),
                request_kwargs=kwargs,
            )
+            _timeout_debug_deployment_dict = deployment
            end_time = time.time()
            _duration = end_time - start_time
            asyncio.create_task(
@ -1009,6 +1024,15 @@ class Router:
            )

            return response
+        except litellm.Timeout as e:
+            deployment_request_timeout_param = _timeout_debug_deployment_dict.get(
+                "litellm_params", {}
+            ).get("request_timeout", None)
+            deployment_timeout_param = _timeout_debug_deployment_dict.get(
+                "litellm_params", {}
+            ).get("timeout", None)
+            e.message += f"\n\nDeployment Info: request_timeout: {deployment_request_timeout_param}\ntimeout: {deployment_timeout_param}"
+            raise e
        except Exception as e:
            verbose_router_logger.info(
                f"litellm.acompletion(model={model_name})\033[31m Exception {str(e)}\033[0m"
@ -3307,6 +3331,7 @@ class Router:
        _num_healthy_deployments = 0
        if healthy_deployments is not None and isinstance(healthy_deployments, list):
            _num_healthy_deployments = len(healthy_deployments)
+
        _num_all_deployments = 0
        if all_deployments is not None and isinstance(all_deployments, list):
            _num_all_deployments = len(all_deployments)