diff --git a/litellm/exceptions.py b/litellm/exceptions.py index 9109735a38..c26928a656 100644 --- a/litellm/exceptions.py +++ b/litellm/exceptions.py @@ -337,20 +337,22 @@ class ContextWindowExceededError(BadRequestError): # type: ignore litellm_debug_info: Optional[str] = None, ): self.status_code = 400 - self.message = "litellm.ContextWindowExceededError: {}".format(message) self.model = model self.llm_provider = llm_provider self.litellm_debug_info = litellm_debug_info request = httpx.Request(method="POST", url="https://api.openai.com/v1") self.response = httpx.Response(status_code=400, request=request) super().__init__( - message=self.message, + message=message, model=self.model, # type: ignore llm_provider=self.llm_provider, # type: ignore response=self.response, litellm_debug_info=self.litellm_debug_info, ) # Call the base class constructor with the parameters it needs + # set after, to make it clear the raised error is a context window exceeded error + self.message = "litellm.ContextWindowExceededError: {}".format(self.message) + def __str__(self): _message = self.message if self.num_retries: diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index 03c892ce5b..b1e93927d6 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -84,6 +84,7 @@ class OpenTelemetry(CustomLogger): from opentelemetry import trace from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.trace import SpanKind if config is None: config = OpenTelemetryConfig.from_env() @@ -99,6 +100,8 @@ class OpenTelemetry(CustomLogger): trace.set_tracer_provider(provider) self.tracer = trace.get_tracer(LITELLM_TRACER_NAME) + self.span_kind = SpanKind + _debug_otel = str(os.getenv("DEBUG_OTEL", "False")).lower() if _debug_otel == "true": diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index 32d47368ed..3d898fe15b 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -1,6 +1,6 @@ import json import traceback -from typing import Optional +from typing import Any, Optional import httpx @@ -84,6 +84,41 @@ def _get_response_headers(original_exception: Exception) -> Optional[httpx.Heade return _response_headers +import re + + +def extract_and_raise_litellm_exception( + response: Optional[Any], + error_str: str, + model: str, + custom_llm_provider: str, +): + """ + Covers scenario where litellm sdk calling proxy. + + Enables raising the special errors raised by litellm, eg. ContextWindowExceededError. + + Relevant Issue: https://github.com/BerriAI/litellm/issues/7259 + """ + pattern = r"litellm\.\w+Error" + + # Search for the exception in the error string + match = re.search(pattern, error_str) + + # Extract the exception if found + if match: + exception_name = match.group(0) + exception_name = exception_name.strip().replace("litellm.", "") + raised_exception_obj = getattr(litellm, exception_name, None) + if raised_exception_obj: + raise raised_exception_obj( + message=error_str, + llm_provider=custom_llm_provider, + model=model, + response=response, + ) + + def exception_type( # type: ignore # noqa: PLR0915 model, original_exception, @@ -197,6 +232,15 @@ def exception_type( # type: ignore # noqa: PLR0915 litellm_debug_info=extra_information, ) + if ( + custom_llm_provider == "litellm_proxy" + ): # handle special case where calling litellm proxy + exception str contains error message + extract_and_raise_litellm_exception( + response=getattr(original_exception, "response", None), + error_str=error_str, + model=model, + custom_llm_provider=custom_llm_provider, + ) if ( custom_llm_provider == "openai" or custom_llm_provider == "text-completion-openai" diff --git a/litellm/main.py b/litellm/main.py index a59a206872..39a9873cf7 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -550,6 +550,17 @@ def _handle_mock_potential_exceptions( ), # type: ignore model=model, ) + elif ( + isinstance(mock_response, str) + and mock_response == "litellm.ContextWindowExceededError" + ): + raise litellm.ContextWindowExceededError( + message="this is a mock context window exceeded error", + llm_provider=getattr( + mock_response, "llm_provider", custom_llm_provider or "openai" + ), # type: ignore + model=model, + ) elif ( isinstance(mock_response, str) and mock_response == "litellm.InternalServerError" @@ -734,7 +745,7 @@ def mock_completion( except Exception as e: if isinstance(e, openai.APIError): raise e - raise Exception("Mock completion response failed") + raise Exception("Mock completion response failed - {}".format(e)) @client diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 55d62eb0ab..9c37d84f8a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -14,4 +14,4 @@ model_list: router_settings: routing_strategy: usage-based-routing-v2 - disable_cooldowns: True \ No newline at end of file + disable_cooldowns: True diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py index 9bec7884e9..1f127e370b 100644 --- a/litellm/proxy/auth/user_api_key_auth.py +++ b/litellm/proxy/auth/user_api_key_auth.py @@ -281,12 +281,14 @@ async def user_api_key_auth( # noqa: PLR0915 ) if open_telemetry_logger is not None: + parent_otel_span = open_telemetry_logger.tracer.start_span( name="Received Proxy Server Request", start_time=_to_ns(start_time), context=open_telemetry_logger.get_traceparent_from_header( headers=request.headers ), + kind=open_telemetry_logger.span_kind.SERVER, ) ### USER-DEFINED AUTH FUNCTION ### diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py index bca86c488b..0b4f828054 100644 --- a/tests/local_testing/test_exceptions.py +++ b/tests/local_testing/test_exceptions.py @@ -1189,3 +1189,19 @@ def test_exceptions_base_class(): assert isinstance(e, litellm.RateLimitError) assert e.code == "429" assert e.type == "throttling_error" + + +def test_context_window_exceeded_error_from_litellm_proxy(): + from httpx import Response + from litellm.litellm_core_utils.exception_mapping_utils import ( + extract_and_raise_litellm_exception, + ) + + args = { + "response": Response(status_code=400, text="Bad Request"), + "error_str": "Error code: 400 - {'error': {'message': \"litellm.ContextWindowExceededError: litellm.BadRequestError: this is a mock context window exceeded error\\nmodel=gpt-3.5-turbo. context_window_fallbacks=None. fallbacks=None.\\n\\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks\\nReceived Model Group=gpt-3.5-turbo\\nAvailable Model Group Fallbacks=None\", 'type': None, 'param': None, 'code': '400'}}", + "model": "gpt-3.5-turbo", + "custom_llm_provider": "litellm_proxy", + } + with pytest.raises(litellm.ContextWindowExceededError): + extract_and_raise_litellm_exception(**args)