From 2eee7f978f04d10bd7e8699424a03043bcdadc9c Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 30 Jan 2025 22:18:53 -0800
Subject: [PATCH] Litellm dev 01 30 2025 p2 (#8134)

* feat(lowest_tpm_rpm_v2.py): fix redis cache check to use >= instead of >

makes it consistent

* test(test_custom_guardrails.py): add more unit testing on default on guardrails

ensure it runs if user sent guardrail list is empty

* docs(quick_start.md): clarify default on guardrails run even if user guardrails list contains other guardrails

* refactor(litellm_logging.py): refactor no-log to helper util

allows for more consistent behavior

* feat(litellm_logging.py): add event hook to verbose logs

* fix(litellm_logging.py): add unit testing to ensure `litellm.disable_no_log_param` is respected

* docs(logging.md): document how to disable 'no-log' param

* test: fix test to handle feb

* test: cleanup old bedrock model

* fix: fix router check
---
 .../docs/proxy/guardrails/quick_start.md      |  2 +
 docs/my-website/docs/proxy/logging.md         | 10 ++++
 litellm/__init__.py                           | 10 ++--
 litellm/litellm_core_utils/litellm_logging.py | 53 ++++++++++++-------
 litellm/proxy/_new_secret_config.yaml         |  9 +++-
 litellm/router.py                             |  4 +-
 litellm/router_strategy/lowest_tpm_rpm_v2.py  |  6 +--
 .../test_bedrock_completion.py                |  2 +-
 .../test_custom_callback_input.py             | 29 ++++++++++
 .../test_custom_guardrail.py                  |  8 +++
 10 files changed, 103 insertions(+), 30 deletions(-)
diff --git a/docs/my-website/docs/proxy/guardrails/quick_start.md b/docs/my-website/docs/proxy/guardrails/quick_start.md
index b565d13178..35f720bf7e 100644
--- a/docs/my-website/docs/proxy/guardrails/quick_start.md
+++ b/docs/my-website/docs/proxy/guardrails/quick_start.md
@@ -125,6 +125,8 @@ curl -i http://localhost:4000/v1/chat/completions \
 
 Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it.
 
+**Note:** These will run even if user specifies a different guardrail or empty guardrails array.
+
 ```yaml
 guardrails:
   - guardrail_name: "aporia-pre-guard"
diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md
index 6992057a22..0c9dd23f56 100644
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@@ -116,6 +116,16 @@ Removes any field with `user_api_key_*` from metadata.
 
 For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body.
 
+:::info
+
+Disable this by setting `global_disable_no_log_param:true` in your config.yaml file.
+
+```yaml
+litellm_settings:
+  global_disable_no_log_param: True
+```
+:::
+
 <Tabs>
 <TabItem value="Curl" label="Curl Request">
 
diff --git a/litellm/__init__.py b/litellm/__init__.py
index bccab529ff..76ab021a0a 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -51,11 +51,12 @@ if set_verbose == True:
     _turn_on_debug()
 ###############################################
 ### Callbacks /Logging / Success / Failure Handlers #####
+CALLBACK_TYPES = Union[str, Callable, CustomLogger]
+input_callback: List[CALLBACK_TYPES] = []
+success_callback: List[CALLBACK_TYPES] = []
+failure_callback: List[CALLBACK_TYPES] = []
+service_callback: List[CALLBACK_TYPES] = []
 logging_callback_manager = LoggingCallbackManager()
-input_callback: List[Union[str, Callable, CustomLogger]] = []
-success_callback: List[Union[str, Callable, CustomLogger]] = []
-failure_callback: List[Union[str, Callable, CustomLogger]] = []
-service_callback: List[Union[str, Callable, CustomLogger]] = []
 _custom_logger_compatible_callbacks_literal = Literal[
     "lago",
     "openmeter",
@@ -1276,3 +1277,4 @@ custom_provider_map: List[CustomLLMItem] = []
 _custom_providers: List[str] = (
     []
 )  # internal helper util, used to track names of custom providers
+global_disable_no_log_param: bool = False
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index e988381ff4..7c5638c945 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -867,6 +867,26 @@ class Logging(LiteLLMLoggingBaseClass):
 
         return None
 
+    def should_run_callback(
+        self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
+    ) -> bool:
+
+        if litellm.global_disable_no_log_param:
+            return True
+
+        if litellm_params.get("no-log", False) is True:
+            # proxy cost tracking cal backs should run
+
+            if not (
+                isinstance(callback, CustomLogger)
+                and "_PROXY_" in callback.__class__.__name__
+            ):
+                verbose_logger.debug(
+                    f"no-log request, skipping logging for {event_hook} event"
+                )
+                return False
+        return True
+
     def _success_handler_helper_fn(
         self,
         result=None,
@@ -1072,14 +1092,13 @@ class Logging(LiteLLMLoggingBaseClass):
             for callback in callbacks:
                 try:
                     litellm_params = self.model_call_details.get("litellm_params", {})
-                    if litellm_params.get("no-log", False) is True:
-                        # proxy cost tracking cal backs should run
-                        if not (
-                            isinstance(callback, CustomLogger)
-                            and "_PROXY_" in callback.__class__.__name__
-                        ):
-                            verbose_logger.info("no-log request, skipping logging")
-                            continue
+                    should_run = self.should_run_callback(
+                        callback=callback,
+                        litellm_params=litellm_params,
+                        event_hook="success_handler",
+                    )
+                    if not should_run:
+                        continue
                     if callback == "promptlayer" and promptLayerLogger is not None:
                         print_verbose("reaches promptlayer for logging!")
                         promptLayerLogger.log_event(
@@ -1626,18 +1645,14 @@ class Logging(LiteLLMLoggingBaseClass):
         for callback in callbacks:
             # check if callback can run for this request
             litellm_params = self.model_call_details.get("litellm_params", {})
-            if litellm_params.get("no-log", False) is True:
-                # proxy cost tracking cal backs should run
-                if not (
-                    isinstance(callback, CustomLogger)
-                    and "_PROXY_" in callback.__class__.__name__
-                ):
-                    print_verbose("no-log request, skipping logging")
-                    continue
+            should_run = self.should_run_callback(
+                callback=callback,
+                litellm_params=litellm_params,
+                event_hook="async_success_handler",
+            )
+            if not should_run:
+                continue
             try:
-                if kwargs.get("no-log", False) is True:
-                    print_verbose("no-log request, skipping logging")
-                    continue
                 if callback == "openmeter" and openMeterLogger is not None:
                     if self.stream is True:
                         if (
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 321e8b676f..ddf14718c9 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,7 +1,8 @@
 model_list:
-  - model_name: gpt-3.5-turbo
+  - model_name: gpt-3.5-turbo-testing
     litellm_params:
       model: gpt-3.5-turbo
+      rpm: 3
   - model_name: anthropic-claude
     litellm_params:
       model: claude-3-5-haiku-20241022
@@ -13,4 +14,8 @@ model_list:
   - model_name: deepseek/*
     litellm_params:
       model: deepseek/*
-      api_key: os.environ/DEEPSEEK_API_KEY
\ No newline at end of file
+      api_key: os.environ/DEEPSEEK_API_KEY
+
+litellm_settings:
+  callbacks: ["langsmith"]
+  disable_no_log_param: true
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index fb3250367b..faa7cd4a6b 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3112,7 +3112,6 @@ class Router:
                 deployment_num_retries, int
             ):
                 num_retries = deployment_num_retries
-
             """
             Retry Logic
             """
@@ -3149,6 +3148,9 @@ class Router:
             else:
                 raise
 
+            verbose_router_logger.info(
+                f"Retrying request with num_retries: {num_retries}"
+            )
             # decides how long to sleep before retry
             retry_after = self._time_to_sleep_before_retry(
                 e=original_exception,
diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py
index d372814cbc..64f086036b 100644
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@@ -145,7 +145,6 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
         Raises - RateLimitError if deployment over defined RPM limit
         """
         try:
-
             # ------------
             # Setup values
             # ------------
@@ -183,6 +182,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                         headers={"retry-after": str(60)},  # type: ignore
                         request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"),  # type: ignore
                     ),
+                    num_retries=deployment.get("num_retries"),
                 )
             else:
                 # if local result below limit, check redis ## prevent unnecessary redis checks
@@ -209,8 +209,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                             headers={"retry-after": str(60)},  # type: ignore
                             request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"),  # type: ignore
                         ),
+                        num_retries=deployment.get("num_retries"),
                     )
-
             return deployment
         except Exception as e:
             if isinstance(e, litellm.RateLimitError):
@@ -540,7 +540,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                         "rpm_limit": _deployment_rpm,
                     }
             raise litellm.RateLimitError(
-                message=f"{RouterErrors.no_deployments_available.value}. 12345 Passed model={model_group}. Deployments={deployment_dict}",
+                message=f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}",
                 llm_provider="",
                 model=model_group,
                 response=httpx.Response(
diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py
index e78f8d141c..dd59415443 100644
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@@ -733,7 +733,7 @@ def test_bedrock_stop_value(stop, model):
     "model",
     [
         "anthropic.claude-3-sonnet-20240229-v1:0",
-        "meta.llama3-70b-instruct-v1:0",
+        # "meta.llama3-70b-instruct-v1:0",
         "anthropic.claude-v2",
         "mistral.mixtral-8x7b-instruct-v0:1",
     ],
diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py
index 9630896a52..8343b63c9d 100644
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@@ -1683,3 +1683,32 @@ def test_standard_logging_retries():
                 "standard_logging_object"
             ]["trace_id"]
         )
+
+
+@pytest.mark.parametrize("disable_no_log_param", [True, False])
+def test_litellm_logging_no_log_param(monkeypatch, disable_no_log_param):
+    monkeypatch.setattr(litellm, "global_disable_no_log_param", disable_no_log_param)
+    from litellm.litellm_core_utils.litellm_logging import Logging
+
+    litellm.success_callback = ["langfuse"]
+    litellm_call_id = "my-unique-call-id"
+    litellm_logging_obj = Logging(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "hi"}],
+        stream=False,
+        call_type="acompletion",
+        litellm_call_id=litellm_call_id,
+        start_time=datetime.now(),
+        function_id="1234",
+    )
+
+    should_run = litellm_logging_obj.should_run_callback(
+        callback="langfuse",
+        litellm_params={"no-log": True},
+        event_hook="success_handler",
+    )
+
+    if disable_no_log_param:
+        assert should_run is True
+    else:
+        assert should_run is False
diff --git a/tests/logging_callback_tests/test_custom_guardrail.py b/tests/logging_callback_tests/test_custom_guardrail.py
index 5a80154735..ae8b8c0c1d 100644
--- a/tests/logging_callback_tests/test_custom_guardrail.py
+++ b/tests/logging_callback_tests/test_custom_guardrail.py
@@ -228,3 +228,11 @@ def test_default_on_guardrail():
         )
         == True
     )
+
+    assert (
+        guardrail.should_run_guardrail(
+            {"metadata": {"guardrails": []}},
+            GuardrailEventHooks.pre_call,
+        )
+        == True
+    )