From 2eee7f978f04d10bd7e8699424a03043bcdadc9c Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Thu, 30 Jan 2025 22:18:53 -0800 Subject: [PATCH] Litellm dev 01 30 2025 p2 (#8134) * feat(lowest_tpm_rpm_v2.py): fix redis cache check to use >= instead of > makes it consistent * test(test_custom_guardrails.py): add more unit testing on default on guardrails ensure it runs if user sent guardrail list is empty * docs(quick_start.md): clarify default on guardrails run even if user guardrails list contains other guardrails * refactor(litellm_logging.py): refactor no-log to helper util allows for more consistent behavior * feat(litellm_logging.py): add event hook to verbose logs * fix(litellm_logging.py): add unit testing to ensure `litellm.disable_no_log_param` is respected * docs(logging.md): document how to disable 'no-log' param * test: fix test to handle feb * test: cleanup old bedrock model * fix: fix router check --- .../docs/proxy/guardrails/quick_start.md | 2 + docs/my-website/docs/proxy/logging.md | 10 ++++ litellm/__init__.py | 10 ++-- litellm/litellm_core_utils/litellm_logging.py | 53 ++++++++++++------- litellm/proxy/_new_secret_config.yaml | 9 +++- litellm/router.py | 4 +- litellm/router_strategy/lowest_tpm_rpm_v2.py | 6 +-- .../test_bedrock_completion.py | 2 +- .../test_custom_callback_input.py | 29 ++++++++++ .../test_custom_guardrail.py | 8 +++ 10 files changed, 103 insertions(+), 30 deletions(-) diff --git a/docs/my-website/docs/proxy/guardrails/quick_start.md b/docs/my-website/docs/proxy/guardrails/quick_start.md index b565d13178..35f720bf7e 100644 --- a/docs/my-website/docs/proxy/guardrails/quick_start.md +++ b/docs/my-website/docs/proxy/guardrails/quick_start.md @@ -125,6 +125,8 @@ curl -i http://localhost:4000/v1/chat/completions \ Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it. +**Note:** These will run even if user specifies a different guardrail or empty guardrails array. + ```yaml guardrails: - guardrail_name: "aporia-pre-guard" diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 6992057a22..0c9dd23f56 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -116,6 +116,16 @@ Removes any field with `user_api_key_*` from metadata. For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body. +:::info + +Disable this by setting `global_disable_no_log_param:true` in your config.yaml file. + +```yaml +litellm_settings: + global_disable_no_log_param: True +``` +::: + diff --git a/litellm/__init__.py b/litellm/__init__.py index bccab529ff..76ab021a0a 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -51,11 +51,12 @@ if set_verbose == True: _turn_on_debug() ############################################### ### Callbacks /Logging / Success / Failure Handlers ##### +CALLBACK_TYPES = Union[str, Callable, CustomLogger] +input_callback: List[CALLBACK_TYPES] = [] +success_callback: List[CALLBACK_TYPES] = [] +failure_callback: List[CALLBACK_TYPES] = [] +service_callback: List[CALLBACK_TYPES] = [] logging_callback_manager = LoggingCallbackManager() -input_callback: List[Union[str, Callable, CustomLogger]] = [] -success_callback: List[Union[str, Callable, CustomLogger]] = [] -failure_callback: List[Union[str, Callable, CustomLogger]] = [] -service_callback: List[Union[str, Callable, CustomLogger]] = [] _custom_logger_compatible_callbacks_literal = Literal[ "lago", "openmeter", @@ -1276,3 +1277,4 @@ custom_provider_map: List[CustomLLMItem] = [] _custom_providers: List[str] = ( [] ) # internal helper util, used to track names of custom providers +global_disable_no_log_param: bool = False diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index e988381ff4..7c5638c945 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -867,6 +867,26 @@ class Logging(LiteLLMLoggingBaseClass): return None + def should_run_callback( + self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str + ) -> bool: + + if litellm.global_disable_no_log_param: + return True + + if litellm_params.get("no-log", False) is True: + # proxy cost tracking cal backs should run + + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + verbose_logger.debug( + f"no-log request, skipping logging for {event_hook} event" + ) + return False + return True + def _success_handler_helper_fn( self, result=None, @@ -1072,14 +1092,13 @@ class Logging(LiteLLMLoggingBaseClass): for callback in callbacks: try: litellm_params = self.model_call_details.get("litellm_params", {}) - if litellm_params.get("no-log", False) is True: - # proxy cost tracking cal backs should run - if not ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - verbose_logger.info("no-log request, skipping logging") - continue + should_run = self.should_run_callback( + callback=callback, + litellm_params=litellm_params, + event_hook="success_handler", + ) + if not should_run: + continue if callback == "promptlayer" and promptLayerLogger is not None: print_verbose("reaches promptlayer for logging!") promptLayerLogger.log_event( @@ -1626,18 +1645,14 @@ class Logging(LiteLLMLoggingBaseClass): for callback in callbacks: # check if callback can run for this request litellm_params = self.model_call_details.get("litellm_params", {}) - if litellm_params.get("no-log", False) is True: - # proxy cost tracking cal backs should run - if not ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - print_verbose("no-log request, skipping logging") - continue + should_run = self.should_run_callback( + callback=callback, + litellm_params=litellm_params, + event_hook="async_success_handler", + ) + if not should_run: + continue try: - if kwargs.get("no-log", False) is True: - print_verbose("no-log request, skipping logging") - continue if callback == "openmeter" and openMeterLogger is not None: if self.stream is True: if ( diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 321e8b676f..ddf14718c9 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,7 +1,8 @@ model_list: - - model_name: gpt-3.5-turbo + - model_name: gpt-3.5-turbo-testing litellm_params: model: gpt-3.5-turbo + rpm: 3 - model_name: anthropic-claude litellm_params: model: claude-3-5-haiku-20241022 @@ -13,4 +14,8 @@ model_list: - model_name: deepseek/* litellm_params: model: deepseek/* - api_key: os.environ/DEEPSEEK_API_KEY \ No newline at end of file + api_key: os.environ/DEEPSEEK_API_KEY + +litellm_settings: + callbacks: ["langsmith"] + disable_no_log_param: true \ No newline at end of file diff --git a/litellm/router.py b/litellm/router.py index fb3250367b..faa7cd4a6b 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3112,7 +3112,6 @@ class Router: deployment_num_retries, int ): num_retries = deployment_num_retries - """ Retry Logic """ @@ -3149,6 +3148,9 @@ class Router: else: raise + verbose_router_logger.info( + f"Retrying request with num_retries: {num_retries}" + ) # decides how long to sleep before retry retry_after = self._time_to_sleep_before_retry( e=original_exception, diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index d372814cbc..64f086036b 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -145,7 +145,6 @@ class LowestTPMLoggingHandler_v2(CustomLogger): Raises - RateLimitError if deployment over defined RPM limit """ try: - # ------------ # Setup values # ------------ @@ -183,6 +182,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger): headers={"retry-after": str(60)}, # type: ignore request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore ), + num_retries=deployment.get("num_retries"), ) else: # if local result below limit, check redis ## prevent unnecessary redis checks @@ -209,8 +209,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger): headers={"retry-after": str(60)}, # type: ignore request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore ), + num_retries=deployment.get("num_retries"), ) - return deployment except Exception as e: if isinstance(e, litellm.RateLimitError): @@ -540,7 +540,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger): "rpm_limit": _deployment_rpm, } raise litellm.RateLimitError( - message=f"{RouterErrors.no_deployments_available.value}. 12345 Passed model={model_group}. Deployments={deployment_dict}", + message=f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}", llm_provider="", model=model_group, response=httpx.Response( diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py index e78f8d141c..dd59415443 100644 --- a/tests/llm_translation/test_bedrock_completion.py +++ b/tests/llm_translation/test_bedrock_completion.py @@ -733,7 +733,7 @@ def test_bedrock_stop_value(stop, model): "model", [ "anthropic.claude-3-sonnet-20240229-v1:0", - "meta.llama3-70b-instruct-v1:0", + # "meta.llama3-70b-instruct-v1:0", "anthropic.claude-v2", "mistral.mixtral-8x7b-instruct-v0:1", ], diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 9630896a52..8343b63c9d 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -1683,3 +1683,32 @@ def test_standard_logging_retries(): "standard_logging_object" ]["trace_id"] ) + + +@pytest.mark.parametrize("disable_no_log_param", [True, False]) +def test_litellm_logging_no_log_param(monkeypatch, disable_no_log_param): + monkeypatch.setattr(litellm, "global_disable_no_log_param", disable_no_log_param) + from litellm.litellm_core_utils.litellm_logging import Logging + + litellm.success_callback = ["langfuse"] + litellm_call_id = "my-unique-call-id" + litellm_logging_obj = Logging( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "hi"}], + stream=False, + call_type="acompletion", + litellm_call_id=litellm_call_id, + start_time=datetime.now(), + function_id="1234", + ) + + should_run = litellm_logging_obj.should_run_callback( + callback="langfuse", + litellm_params={"no-log": True}, + event_hook="success_handler", + ) + + if disable_no_log_param: + assert should_run is True + else: + assert should_run is False diff --git a/tests/logging_callback_tests/test_custom_guardrail.py b/tests/logging_callback_tests/test_custom_guardrail.py index 5a80154735..ae8b8c0c1d 100644 --- a/tests/logging_callback_tests/test_custom_guardrail.py +++ b/tests/logging_callback_tests/test_custom_guardrail.py @@ -228,3 +228,11 @@ def test_default_on_guardrail(): ) == True ) + + assert ( + guardrail.should_run_guardrail( + {"metadata": {"guardrails": []}}, + GuardrailEventHooks.pre_call, + ) + == True + )