From b1b21b03404bc74497b84d7f1e02249a1d1da28a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 5 Jul 2024 14:39:35 -0700 Subject: [PATCH] fix(proxy/utils.py): support logging rejected requests to langfuse, etc. --- litellm/litellm_core_utils/litellm_logging.py | 5 --- litellm/proxy/utils.py | 40 ++++--------------- litellm/tests/test_proxy_reject_logging.py | 35 +++++++++++----- litellm/utils.py | 8 ++-- 4 files changed, 36 insertions(+), 52 deletions(-) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 4edbce5e1..c0fdd3090 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -153,11 +153,6 @@ class Logging: langfuse_secret=None, langfuse_host=None, ): - if call_type not in [item.value for item in CallTypes]: - allowed_values = ", ".join([item.value for item in CallTypes]) - raise ValueError( - f"Invalid call_type {call_type}. Allowed values: {allowed_values}" - ) if messages is not None: if isinstance(messages, str): messages = [ diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index f09251107..0e0294bbe 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -49,6 +49,7 @@ from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter from litellm.proxy.hooks.parallel_request_limiter import ( _PROXY_MaxParallelRequestsHandler, ) +from litellm.types.utils import CallTypes if TYPE_CHECKING: from opentelemetry.trace import Span as _Span @@ -354,35 +355,6 @@ class ProxyLogging: print_verbose(f"final data being sent to {call_type} call: {data}") return data except Exception as e: - if "litellm_logging_obj" in data: - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[ - "litellm_logging_obj" - ] - - ## ASYNC FAILURE HANDLER ## - error_message = "" - if isinstance(e, HTTPException): - if isinstance(e.detail, str): - error_message = e.detail - elif isinstance(e.detail, dict): - error_message = json.dumps(e.detail) - else: - error_message = str(e) - else: - error_message = str(e) - error_raised = Exception(f"{error_message}") - await logging_obj.async_failure_handler( - exception=error_raised, - traceback_exception=traceback.format_exc(), - ) - - ## SYNC FAILURE HANDLER ## - try: - logging_obj.failure_handler( - error_raised, traceback.format_exc() - ) # DO NOT MAKE THREADED - router retry fallback relies on this! - except Exception as error_val: - pass raise e async def during_call_hook( @@ -597,12 +569,14 @@ class ProxyLogging: ) ### LOGGING ### - litellm_logging_obj: Optional[Logging] = request_data.get( - "litellm_logging_obj", None - ) - if isinstance(original_exception, HTTPException): + litellm_logging_obj: Optional[Logging] = request_data.get( + "litellm_logging_obj", None + ) if litellm_logging_obj is None: + import uuid + + request_data["litellm_call_id"] = str(uuid.uuid4()) litellm_logging_obj, data = litellm.utils.function_setup( original_function="IGNORE_THIS", rules_obj=litellm.utils.Rules(), diff --git a/litellm/tests/test_proxy_reject_logging.py b/litellm/tests/test_proxy_reject_logging.py index 7edd70381..f176b2f8c 100644 --- a/litellm/tests/test_proxy_reject_logging.py +++ b/litellm/tests/test_proxy_reject_logging.py @@ -23,6 +23,8 @@ import os sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path +from typing import Literal + import pytest from fastapi import Request, Response from starlette.datastructures import URL @@ -51,7 +53,20 @@ class testLogger(CustomLogger): def __init__(self): self.reaches_failure_event = False - async def async_pre_call_check(self, deployment: dict): + async def async_pre_call_hook( + self, + user_api_key_dict: UserAPIKeyAuth, + cache: DualCache, + data: dict, + call_type: Literal[ + "completion", + "text_completion", + "embeddings", + "image_generation", + "moderation", + "audio_transcription", + ], + ): raise HTTPException( status_code=429, detail={"error": "Max parallel request limit reached"} ) @@ -92,15 +107,15 @@ router = Router( ], }, ), - # ("/v1/completions", {"model": "fake-model", "prompt": "ping"}), - # ( - # "/v1/embeddings", - # { - # "input": "The food was delicious and the waiter...", - # "model": "text-embedding-ada-002", - # "encoding_format": "float", - # }, - # ), + ("/v1/completions", {"model": "fake-model", "prompt": "ping"}), + ( + "/v1/embeddings", + { + "input": "The food was delicious and the waiter...", + "model": "text-embedding-ada-002", + "encoding_format": "float", + }, + ), ], ) @pytest.mark.asyncio diff --git a/litellm/utils.py b/litellm/utils.py index 490b809a1..1010beb96 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -531,6 +531,8 @@ def function_setup( call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value ): messages = kwargs.get("input", "speech") + else: + messages = "default-message-value" stream = True if "stream" in kwargs and kwargs["stream"] == True else False logging_obj = litellm.litellm_core_utils.litellm_logging.Logging( model=model, @@ -561,10 +563,8 @@ def function_setup( ) return logging_obj, kwargs except Exception as e: - import logging - - logging.debug( - f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}" + verbose_logger.error( + f"litellm.utils.py::function_setup() - [Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}" ) raise e