fix(proxy/utils.py): support logging rejected requests to langfuse, etc.

2024-07-05 14:39:35 -07:00 · 2024-07-05 14:39:35 -07:00 · b1b21b0340
commit b1b21b0340
parent d528b66db0
4 changed files with 36 additions and 52 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -153,11 +153,6 @@ class Logging:
        langfuse_secret=None,
        langfuse_host=None,
    ):
-        if call_type not in [item.value for item in CallTypes]:
-            allowed_values = ", ".join([item.value for item in CallTypes])
-            raise ValueError(
-                f"Invalid call_type {call_type}. Allowed values: {allowed_values}"
-            )
        if messages is not None:
            if isinstance(messages, str):
                messages = [
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -49,6 +49,7 @@ from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.parallel_request_limiter import (
    _PROXY_MaxParallelRequestsHandler,
 )
+from litellm.types.utils import CallTypes

 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
@ -354,35 +355,6 @@ class ProxyLogging:
            print_verbose(f"final data being sent to {call_type} call: {data}")
            return data
        except Exception as e:
-            if "litellm_logging_obj" in data:
-                logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
-                    "litellm_logging_obj"
-                ]
-
-                ## ASYNC FAILURE HANDLER ##
-                error_message = ""
-                if isinstance(e, HTTPException):
-                    if isinstance(e.detail, str):
-                        error_message = e.detail
-                    elif isinstance(e.detail, dict):
-                        error_message = json.dumps(e.detail)
-                    else:
-                        error_message = str(e)
-                else:
-                    error_message = str(e)
-                error_raised = Exception(f"{error_message}")
-                await logging_obj.async_failure_handler(
-                    exception=error_raised,
-                    traceback_exception=traceback.format_exc(),
-                )
-
-                ## SYNC FAILURE HANDLER ##
-                try:
-                    logging_obj.failure_handler(
-                        error_raised, traceback.format_exc()
-                    )  # DO NOT MAKE THREADED - router retry fallback relies on this!
-                except Exception as error_val:
-                    pass
            raise e

    async def during_call_hook(
@ -597,12 +569,14 @@ class ProxyLogging:
            )

        ### LOGGING ###
-        litellm_logging_obj: Optional[Logging] = request_data.get(
-            "litellm_logging_obj", None
-        )
-
        if isinstance(original_exception, HTTPException):
+            litellm_logging_obj: Optional[Logging] = request_data.get(
+                "litellm_logging_obj", None
+            )
            if litellm_logging_obj is None:
+                import uuid
+
+                request_data["litellm_call_id"] = str(uuid.uuid4())
                litellm_logging_obj, data = litellm.utils.function_setup(
                    original_function="IGNORE_THIS",
                    rules_obj=litellm.utils.Rules(),
--- a/litellm/tests/test_proxy_reject_logging.py
+++ b/litellm/tests/test_proxy_reject_logging.py
@ -23,6 +23,8 @@ import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
+from typing import Literal
+
 import pytest
 from fastapi import Request, Response
 from starlette.datastructures import URL
@ -51,7 +53,20 @@ class testLogger(CustomLogger):
    def __init__(self):
        self.reaches_failure_event = False

-    async def async_pre_call_check(self, deployment: dict):
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+        ],
+    ):
        raise HTTPException(
            status_code=429, detail={"error": "Max parallel request limit reached"}
        )
@ -92,15 +107,15 @@ router = Router(
                ],
            },
        ),
-        # ("/v1/completions", {"model": "fake-model", "prompt": "ping"}),
-        # (
-        #     "/v1/embeddings",
-        #     {
-        #         "input": "The food was delicious and the waiter...",
-        #         "model": "text-embedding-ada-002",
-        #         "encoding_format": "float",
-        #     },
-        # ),
+        ("/v1/completions", {"model": "fake-model", "prompt": "ping"}),
+        (
+            "/v1/embeddings",
+            {
+                "input": "The food was delicious and the waiter...",
+                "model": "text-embedding-ada-002",
+                "encoding_format": "float",
+            },
+        ),
    ],
 )
@pytest.mark.asyncio
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -531,6 +531,8 @@ def function_setup(
            call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
        ):
            messages = kwargs.get("input", "speech")
+        else:
+            messages = "default-message-value"
        stream = True if "stream" in kwargs and kwargs["stream"] == True else False
        logging_obj = litellm.litellm_core_utils.litellm_logging.Logging(
            model=model,
@ -561,10 +563,8 @@ def function_setup(
        )
        return logging_obj, kwargs
    except Exception as e:
-        import logging
-
-        logging.debug(
-            f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}"
+        verbose_logger.error(
+            f"litellm.utils.py::function_setup() - [Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}"
        )
        raise e