Litellm dev 01 30 2025 p2 (#8134)

* feat(lowest_tpm_rpm_v2.py): fix redis cache check to use >= instead of > makes it consistent * test(test_custom_guardrails.py): add more unit testing on default on guardrails ensure it runs if user sent guardrail list is empty * docs(quick_start.md): clarify default on guardrails run even if user guardrails list contains other guardrails * refactor(litellm_logging.py): refactor no-log to helper util allows for more consistent behavior * feat(litellm_logging.py): add event hook to verbose logs * fix(litellm_logging.py): add unit testing to ensure `litellm.disable_no_log_param` is respected * docs(logging.md): document how to disable 'no-log' param * test: fix test to handle feb * test: cleanup old bedrock model * fix: fix router check
2025-04-26 11:14:04 +00:00 · 2025-01-30 22:18:53 -08:00 · 2025-01-30 22:18:53 -08:00 · 2eee7f978f
commit 2eee7f978f
parent 78a21b66a2
10 changed files with 103 additions and 30 deletions
--- a/docs/my-website/docs/proxy/guardrails/quick_start.md
+++ b/docs/my-website/docs/proxy/guardrails/quick_start.md
@ -125,6 +125,8 @@ curl -i http://localhost:4000/v1/chat/completions \
 Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it.
 **Note:** These will run even if user specifies a different guardrail or empty guardrails array.
 ```yaml
 guardrails:
  - guardrail_name: "aporia-pre-guard"
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -116,6 +116,16 @@ Removes any field with `user_api_key_*` from metadata.
 For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body.
 :::info
 Disable this by setting `global_disable_no_log_param:true` in your config.yaml file.
 ```yaml
 litellm_settings:
  global_disable_no_log_param: True
 ```
 :::
 <Tabs>
 <TabItem value="Curl" label="Curl Request">
--- a/litellm/init.py
+++ b/litellm/init.py
@ -51,11 +51,12 @@ if set_verbose == True:
    _turn_on_debug()
 ###############################################
 ### Callbacks /Logging / Success / Failure Handlers #####
 CALLBACK_TYPES = Union[str, Callable, CustomLogger]
 input_callback: List[CALLBACK_TYPES] = []
 success_callback: List[CALLBACK_TYPES] = []
 failure_callback: List[CALLBACK_TYPES] = []
 service_callback: List[CALLBACK_TYPES] = []
 logging_callback_manager = LoggingCallbackManager()
 input_callback: List[Union[str, Callable, CustomLogger]] = []
 success_callback: List[Union[str, Callable, CustomLogger]] = []
 failure_callback: List[Union[str, Callable, CustomLogger]] = []
 service_callback: List[Union[str, Callable, CustomLogger]] = []
 _custom_logger_compatible_callbacks_literal = Literal[
    "lago",
    "openmeter",
@ -1276,3 +1277,4 @@ custom_provider_map: List[CustomLLMItem] = []
 _custom_providers: List[str] = (
    []
 )  # internal helper util, used to track names of custom providers
 global_disable_no_log_param: bool = False
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -867,6 +867,26 @@ class Logging(LiteLLMLoggingBaseClass):
        return None
    def should_run_callback(
        self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
    ) -> bool:
        if litellm.global_disable_no_log_param:
            return True
        if litellm_params.get("no-log", False) is True:
            # proxy cost tracking cal backs should run
            if not (
                isinstance(callback, CustomLogger)
                and "_PROXY_" in callback.__class__.__name__
            ):
                verbose_logger.debug(
                    f"no-log request, skipping logging for {event_hook} event"
                )
                return False
        return True
    def _success_handler_helper_fn(
        self,
        result=None,
@ -1072,13 +1092,12 @@ class Logging(LiteLLMLoggingBaseClass):
            for callback in callbacks:
                try:
                    litellm_params = self.model_call_details.get("litellm_params", {})
-                    if litellm_params.get("no-log", False) is True:
+                    should_run = self.should_run_callback(
-                        # proxy cost tracking cal backs should run
+                        callback=callback,
-                        if not (
+                        litellm_params=litellm_params,
-                            isinstance(callback, CustomLogger)
+                        event_hook="success_handler",
-                            and "_PROXY_" in callback.__class__.__name__
+                    )
-                        ):
+                    if not should_run:
                            verbose_logger.info("no-log request, skipping logging")
                        continue
                    if callback == "promptlayer" and promptLayerLogger is not None:
                        print_verbose("reaches promptlayer for logging!")
@ -1626,18 +1645,14 @@ class Logging(LiteLLMLoggingBaseClass):
        for callback in callbacks:
            # check if callback can run for this request
            litellm_params = self.model_call_details.get("litellm_params", {})
-            if litellm_params.get("no-log", False) is True:
+            should_run = self.should_run_callback(
-                # proxy cost tracking cal backs should run
+                callback=callback,
-                if not (
+                litellm_params=litellm_params,
-                    isinstance(callback, CustomLogger)
+                event_hook="async_success_handler",
-                    and "_PROXY_" in callback.__class__.__name__
+            )
-                ):
+            if not should_run:
                    print_verbose("no-log request, skipping logging")
                continue
            try:
                if kwargs.get("no-log", False) is True:
                    print_verbose("no-log request, skipping logging")
                    continue
                if callback == "openmeter" and openMeterLogger is not None:
                    if self.stream is True:
                        if (
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,7 +1,8 @@
 model_list:
-  - model_name: gpt-3.5-turbo
+  - model_name: gpt-3.5-turbo-testing
    litellm_params:
      model: gpt-3.5-turbo
      rpm: 3
  - model_name: anthropic-claude
    litellm_params:
      model: claude-3-5-haiku-20241022
@ -14,3 +15,7 @@ model_list:
    litellm_params:
      model: deepseek/*
      api_key: os.environ/DEEPSEEK_API_KEY
 litellm_settings:
  callbacks: ["langsmith"]
  disable_no_log_param: true
--- a/litellm/router.py
+++ b/litellm/router.py
@ -3112,7 +3112,6 @@ class Router:
                deployment_num_retries, int
            ):
                num_retries = deployment_num_retries
            """
            Retry Logic
            """
@ -3149,6 +3148,9 @@ class Router:
            else:
                raise
            verbose_router_logger.info(
                f"Retrying request with num_retries: {num_retries}"
            )
            # decides how long to sleep before retry
            retry_after = self._time_to_sleep_before_retry(
                e=original_exception,
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -145,7 +145,6 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
        Raises - RateLimitError if deployment over defined RPM limit
        """
        try:
            # ------------
            # Setup values
            # ------------
@ -183,6 +182,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                        headers={"retry-after": str(60)},  # type: ignore
                        request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"),  # type: ignore
                    ),
                    num_retries=deployment.get("num_retries"),
                )
            else:
                # if local result below limit, check redis ## prevent unnecessary redis checks
@ -209,8 +209,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                            headers={"retry-after": str(60)},  # type: ignore
                            request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"),  # type: ignore
                        ),
                        num_retries=deployment.get("num_retries"),
                    )
            return deployment
        except Exception as e:
            if isinstance(e, litellm.RateLimitError):
@ -540,7 +540,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                        "rpm_limit": _deployment_rpm,
                    }
            raise litellm.RateLimitError(
-                message=f"{RouterErrors.no_deployments_available.value}. 12345 Passed model={model_group}. Deployments={deployment_dict}",
+                message=f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}",
                llm_provider="",
                model=model_group,
                response=httpx.Response(
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@ -733,7 +733,7 @@ def test_bedrock_stop_value(stop, model):
    "model",
    [
        "anthropic.claude-3-sonnet-20240229-v1:0",
-        "meta.llama3-70b-instruct-v1:0",
+        # "meta.llama3-70b-instruct-v1:0",
        "anthropic.claude-v2",
        "mistral.mixtral-8x7b-instruct-v0:1",
    ],
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@ -1683,3 +1683,32 @@ def test_standard_logging_retries():
                "standard_logging_object"
            ]["trace_id"]
        )
@pytest.mark.parametrize("disable_no_log_param", [True, False])
 def test_litellm_logging_no_log_param(monkeypatch, disable_no_log_param):
    monkeypatch.setattr(litellm, "global_disable_no_log_param", disable_no_log_param)
    from litellm.litellm_core_utils.litellm_logging import Logging
    litellm.success_callback = ["langfuse"]
    litellm_call_id = "my-unique-call-id"
    litellm_logging_obj = Logging(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "hi"}],
        stream=False,
        call_type="acompletion",
        litellm_call_id=litellm_call_id,
        start_time=datetime.now(),
        function_id="1234",
    )
    should_run = litellm_logging_obj.should_run_callback(
        callback="langfuse",
        litellm_params={"no-log": True},
        event_hook="success_handler",
    )
    if disable_no_log_param:
        assert should_run is True
    else:
        assert should_run is False
--- a/tests/logging_callback_tests/test_custom_guardrail.py
+++ b/tests/logging_callback_tests/test_custom_guardrail.py
@ -228,3 +228,11 @@ def test_default_on_guardrail():
        )
        == True
    )
    assert (
        guardrail.should_run_guardrail(
            {"metadata": {"guardrails": []}},
            GuardrailEventHooks.pre_call,
        )
        == True
    )