fix(router.py): improve error message returned for fallbacks

2024-06-25 11:26:56 -07:00 · 2024-06-25 11:26:56 -07:00 · cccc55213b
commit cccc55213b
parent 2bd993039b
3 changed files with 85 additions and 63 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -45,7 +45,7 @@ litellm_settings:
  request_timeout: 120
  allowed_fails: 3
  # fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
-  context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
+  # context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2175,73 +2175,93 @@ class Router:
                    )
                ):  # don't retry a malformed request
                    raise e
-                if (
+                if isinstance(e, litellm.ContextWindowExceededError):
-                    isinstance(e, litellm.ContextWindowExceededError)
+                    if context_window_fallbacks is not None:
-                    and context_window_fallbacks is not None
+                        fallback_model_group = None
-                ):
+                        for (
-                    fallback_model_group = None
+                            item
-                    for (
+                        ) in context_window_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
-                        item
+                            if list(item.keys())[0] == model_group:
-                    ) in context_window_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                                fallback_model_group = item[model_group]
-                        if list(item.keys())[0] == model_group:
+                                break
                            fallback_model_group = item[model_group]
                            break
-                    if fallback_model_group is None:
+                        if fallback_model_group is None:
-                        raise original_exception
+                            raise original_exception
-                    for mg in fallback_model_group:
+                        for mg in fallback_model_group:
-                        """
+                            """
-                        Iterate through the model groups and try calling that deployment
+                            Iterate through the model groups and try calling that deployment
-                        """
+                            """
-                        try:
+                            try:
-                            kwargs["model"] = mg
+                                kwargs["model"] = mg
-                            kwargs.setdefault("metadata", {}).update(
+                                kwargs.setdefault("metadata", {}).update(
-                                {"model_group": mg}
+                                    {"model_group": mg}
-                            )  # update model_group used, if fallbacks are done
+                                )  # update model_group used, if fallbacks are done
-                            response = await self.async_function_with_retries(
+                                response = await self.async_function_with_retries(
-                                *args, **kwargs
+                                    *args, **kwargs
                                )
                                verbose_router_logger.info(
                                    "Successful fallback b/w models."
                                )
                                return response
                            except Exception as e:
                                pass
                    else:
                        error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
                            model_group, context_window_fallbacks, fallbacks
                        )
                        verbose_router_logger.info(
                            msg="Got 'ContextWindowExceededError'. No context_window_fallback set. Defaulting \
                            to fallbacks, if available.{}".format(
                                error_message
                            )
-                            verbose_router_logger.info(
+                        )
                                "Successful fallback b/w models."
                            )
                            return response
                        except Exception as e:
                            pass
                elif (
                    isinstance(e, litellm.ContentPolicyViolationError)
                    and content_policy_fallbacks is not None
                ):
                    fallback_model_group = None
                    for (
                        item
                    ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
                        if list(item.keys())[0] == model_group:
                            fallback_model_group = item[model_group]
                            break
-                    if fallback_model_group is None:
+                        e.message += "\n{}".format(error_message)
-                        raise original_exception
+                elif isinstance(e, litellm.ContentPolicyViolationError):
                    if content_policy_fallbacks is not None:
                        fallback_model_group = None
                        for (
                            item
                        ) in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
                            if list(item.keys())[0] == model_group:
                                fallback_model_group = item[model_group]
                                break
-                    for mg in fallback_model_group:
+                        if fallback_model_group is None:
-                        """
+                            raise original_exception
-                        Iterate through the model groups and try calling that deployment
+
-                        """
+                        for mg in fallback_model_group:
-                        try:
+                            """
-                            kwargs["model"] = mg
+                            Iterate through the model groups and try calling that deployment
-                            kwargs.setdefault("metadata", {}).update(
+                            """
-                                {"model_group": mg}
+                            try:
-                            )  # update model_group used, if fallbacks are done
+                                kwargs["model"] = mg
-                            response = await self.async_function_with_retries(
+                                kwargs.setdefault("metadata", {}).update(
-                                *args, **kwargs
+                                    {"model_group": mg}
                                )  # update model_group used, if fallbacks are done
                                response = await self.async_function_with_retries(
                                    *args, **kwargs
                                )
                                verbose_router_logger.info(
                                    "Successful fallback b/w models."
                                )
                                return response
                            except Exception as e:
                                pass
                    else:
                        error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
                            model_group, content_policy_fallbacks, fallbacks
                        )
                        verbose_router_logger.info(
                            msg="Got 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting \
                            to fallbacks, if available.{}".format(
                                error_message
                            )
-                            verbose_router_logger.info(
+                        )
-                                "Successful fallback b/w models."
+
-                            )
+                        e.message += "\n{}".format(error_message)
-                            return response
+                if fallbacks is not None:
                        except Exception as e:
                            pass
                elif fallbacks is not None:
                    verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
                    generic_fallback_idx: Optional[int] = None
                    ## check for specific model group-specific fallbacks
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@ -1129,7 +1129,9 @@ async def test_router_content_policy_fallbacks(
        mock_response = Exception("content filtering policy")
    else:
        mock_response = litellm.ModelResponse(
-            choices=[litellm.Choices(finish_reason="content_filter")]
+            choices=[litellm.Choices(finish_reason="content_filter")],
            model="gpt-3.5-turbo",
            usage=litellm.Usage(prompt_tokens=10, completion_tokens=0, total_tokens=10),
        )
    router = Router(
        model_list=[