LiteLLM Minor Fixes & Improvements (12/27/2024) - p1 (#7448)

* feat(main.py): mock_response() - support 'litellm.ContextWindowExceededError' in mock response enabled quicker router/fallback/proxy debug on context window errors * feat(exception_mapping_utils.py): extract special litellm errors from error str if calling `litellm_proxy/` as provider Closes https://github.com/BerriAI/litellm/issues/7259 * fix(user_api_key_auth.py): specify 'Received Proxy Server Request' is span kind server Closes https://github.com/BerriAI/litellm/issues/7298
2025-04-25 10:44:24 +00:00 · 2024-12-27 19:04:39 -08:00 · 2024-12-27 19:04:39 -08:00 · 67b39bacf7
commit 67b39bacf7
parent cca9cfe667
7 changed files with 83 additions and 5 deletions
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -337,20 +337,22 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore
        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = 400
        self.message = "litellm.ContextWindowExceededError: {}".format(message)
        self.model = model
        self.llm_provider = llm_provider
        self.litellm_debug_info = litellm_debug_info
        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
        self.response = httpx.Response(status_code=400, request=request)
        super().__init__(
-            message=self.message,
+            message=message,
            model=self.model,  # type: ignore
            llm_provider=self.llm_provider,  # type: ignore
            response=self.response,
            litellm_debug_info=self.litellm_debug_info,
        )  # Call the base class constructor with the parameters it needs
        # set after, to make it clear the raised error is a context window exceeded error
        self.message = "litellm.ContextWindowExceededError: {}".format(self.message)
    def __str__(self):
        _message = self.message
        if self.num_retries:
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -84,6 +84,7 @@ class OpenTelemetry(CustomLogger):
        from opentelemetry import trace
        from opentelemetry.sdk.resources import Resource
        from opentelemetry.sdk.trace import TracerProvider
        from opentelemetry.trace import SpanKind
        if config is None:
            config = OpenTelemetryConfig.from_env()
@ -99,6 +100,8 @@ class OpenTelemetry(CustomLogger):
        trace.set_tracer_provider(provider)
        self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
        self.span_kind = SpanKind
        _debug_otel = str(os.getenv("DEBUG_OTEL", "False")).lower()
        if _debug_otel == "true":
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -1,6 +1,6 @@
 import json
 import traceback
-from typing import Optional
+from typing import Any, Optional
 import httpx
@ -84,6 +84,41 @@ def _get_response_headers(original_exception: Exception) -> Optional[httpx.Heade
    return _response_headers
 import re
 def extract_and_raise_litellm_exception(
    response: Optional[Any],
    error_str: str,
    model: str,
    custom_llm_provider: str,
 ):
    """
    Covers scenario where litellm sdk calling proxy.
    Enables raising the special errors raised by litellm, eg. ContextWindowExceededError.
    Relevant Issue: https://github.com/BerriAI/litellm/issues/7259
    """
    pattern = r"litellm\.\w+Error"
    # Search for the exception in the error string
    match = re.search(pattern, error_str)
    # Extract the exception if found
    if match:
        exception_name = match.group(0)
        exception_name = exception_name.strip().replace("litellm.", "")
        raised_exception_obj = getattr(litellm, exception_name, None)
        if raised_exception_obj:
            raise raised_exception_obj(
                message=error_str,
                llm_provider=custom_llm_provider,
                model=model,
                response=response,
            )
 def exception_type(  # type: ignore  # noqa: PLR0915
    model,
    original_exception,
@ -197,6 +232,15 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                    litellm_debug_info=extra_information,
                )
            if (
                custom_llm_provider == "litellm_proxy"
            ):  # handle special case where calling litellm proxy + exception str contains error message
                extract_and_raise_litellm_exception(
                    response=getattr(original_exception, "response", None),
                    error_str=error_str,
                    model=model,
                    custom_llm_provider=custom_llm_provider,
                )
            if (
                custom_llm_provider == "openai"
                or custom_llm_provider == "text-completion-openai"
--- a/litellm/main.py
+++ b/litellm/main.py
@ -550,6 +550,17 @@ def _handle_mock_potential_exceptions(
            ),  # type: ignore
            model=model,
        )
    elif (
        isinstance(mock_response, str)
        and mock_response == "litellm.ContextWindowExceededError"
    ):
        raise litellm.ContextWindowExceededError(
            message="this is a mock context window exceeded error",
            llm_provider=getattr(
                mock_response, "llm_provider", custom_llm_provider or "openai"
            ),  # type: ignore
            model=model,
        )
    elif (
        isinstance(mock_response, str)
        and mock_response == "litellm.InternalServerError"
@ -734,7 +745,7 @@ def mock_completion(
    except Exception as e:
        if isinstance(e, openai.APIError):
            raise e
-        raise Exception("Mock completion response failed")
+        raise Exception("Mock completion response failed - {}".format(e))
@client
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -14,4 +14,4 @@ model_list:
 router_settings:
  routing_strategy: usage-based-routing-v2
-  disable_cooldowns: True
+  disable_cooldowns: True
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -281,12 +281,14 @@ async def user_api_key_auth(  # noqa: PLR0915
            )
        if open_telemetry_logger is not None:
            parent_otel_span = open_telemetry_logger.tracer.start_span(
                name="Received Proxy Server Request",
                start_time=_to_ns(start_time),
                context=open_telemetry_logger.get_traceparent_from_header(
                    headers=request.headers
                ),
                kind=open_telemetry_logger.span_kind.SERVER,
            )
        ### USER-DEFINED AUTH FUNCTION ###
--- a/tests/local_testing/test_exceptions.py
+++ b/tests/local_testing/test_exceptions.py
@ -1189,3 +1189,19 @@ def test_exceptions_base_class():
        assert isinstance(e, litellm.RateLimitError)
        assert e.code == "429"
        assert e.type == "throttling_error"
 def test_context_window_exceeded_error_from_litellm_proxy():
    from httpx import Response
    from litellm.litellm_core_utils.exception_mapping_utils import (
        extract_and_raise_litellm_exception,
    )
    args = {
        "response": Response(status_code=400, text="Bad Request"),
        "error_str": "Error code: 400 - {'error': {'message': \"litellm.ContextWindowExceededError: litellm.BadRequestError: this is a mock context window exceeded error\\nmodel=gpt-3.5-turbo. context_window_fallbacks=None. fallbacks=None.\\n\\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks\\nReceived Model Group=gpt-3.5-turbo\\nAvailable Model Group Fallbacks=None\", 'type': None, 'param': None, 'code': '400'}}",
        "model": "gpt-3.5-turbo",
        "custom_llm_provider": "litellm_proxy",
    }
    with pytest.raises(litellm.ContextWindowExceededError):
        extract_and_raise_litellm_exception(**args)