Litellm dev 01 30 2025 p2 (#8134)

* feat(lowest_tpm_rpm_v2.py): fix redis cache check to use >= instead of >

makes it consistent

* test(test_custom_guardrails.py): add more unit testing on default on guardrails

ensure it runs if user sent guardrail list is empty

* docs(quick_start.md): clarify default on guardrails run even if user guardrails list contains other guardrails

* refactor(litellm_logging.py): refactor no-log to helper util

allows for more consistent behavior

* feat(litellm_logging.py): add event hook to verbose logs

* fix(litellm_logging.py): add unit testing to ensure `litellm.disable_no_log_param` is respected

* docs(logging.md): document how to disable 'no-log' param

* test: fix test to handle feb

* test: cleanup old bedrock model

* fix: fix router check
This commit is contained in:
Krish Dholakia 2025-01-30 22:18:53 -08:00 committed by GitHub
parent 78a21b66a2
commit 2eee7f978f
10 changed files with 103 additions and 30 deletions

View file

@ -125,6 +125,8 @@ curl -i http://localhost:4000/v1/chat/completions \
Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it. Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it.
**Note:** These will run even if user specifies a different guardrail or empty guardrails array.
```yaml ```yaml
guardrails: guardrails:
- guardrail_name: "aporia-pre-guard" - guardrail_name: "aporia-pre-guard"

View file

@ -116,6 +116,16 @@ Removes any field with `user_api_key_*` from metadata.
For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body. For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body.
:::info
Disable this by setting `global_disable_no_log_param:true` in your config.yaml file.
```yaml
litellm_settings:
global_disable_no_log_param: True
```
:::
<Tabs> <Tabs>
<TabItem value="Curl" label="Curl Request"> <TabItem value="Curl" label="Curl Request">

View file

@ -51,11 +51,12 @@ if set_verbose == True:
_turn_on_debug() _turn_on_debug()
############################################### ###############################################
### Callbacks /Logging / Success / Failure Handlers ##### ### Callbacks /Logging / Success / Failure Handlers #####
CALLBACK_TYPES = Union[str, Callable, CustomLogger]
input_callback: List[CALLBACK_TYPES] = []
success_callback: List[CALLBACK_TYPES] = []
failure_callback: List[CALLBACK_TYPES] = []
service_callback: List[CALLBACK_TYPES] = []
logging_callback_manager = LoggingCallbackManager() logging_callback_manager = LoggingCallbackManager()
input_callback: List[Union[str, Callable, CustomLogger]] = []
success_callback: List[Union[str, Callable, CustomLogger]] = []
failure_callback: List[Union[str, Callable, CustomLogger]] = []
service_callback: List[Union[str, Callable, CustomLogger]] = []
_custom_logger_compatible_callbacks_literal = Literal[ _custom_logger_compatible_callbacks_literal = Literal[
"lago", "lago",
"openmeter", "openmeter",
@ -1276,3 +1277,4 @@ custom_provider_map: List[CustomLLMItem] = []
_custom_providers: List[str] = ( _custom_providers: List[str] = (
[] []
) # internal helper util, used to track names of custom providers ) # internal helper util, used to track names of custom providers
global_disable_no_log_param: bool = False

View file

@ -867,6 +867,26 @@ class Logging(LiteLLMLoggingBaseClass):
return None return None
def should_run_callback(
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
) -> bool:
if litellm.global_disable_no_log_param:
return True
if litellm_params.get("no-log", False) is True:
# proxy cost tracking cal backs should run
if not (
isinstance(callback, CustomLogger)
and "_PROXY_" in callback.__class__.__name__
):
verbose_logger.debug(
f"no-log request, skipping logging for {event_hook} event"
)
return False
return True
def _success_handler_helper_fn( def _success_handler_helper_fn(
self, self,
result=None, result=None,
@ -1072,14 +1092,13 @@ class Logging(LiteLLMLoggingBaseClass):
for callback in callbacks: for callback in callbacks:
try: try:
litellm_params = self.model_call_details.get("litellm_params", {}) litellm_params = self.model_call_details.get("litellm_params", {})
if litellm_params.get("no-log", False) is True: should_run = self.should_run_callback(
# proxy cost tracking cal backs should run callback=callback,
if not ( litellm_params=litellm_params,
isinstance(callback, CustomLogger) event_hook="success_handler",
and "_PROXY_" in callback.__class__.__name__ )
): if not should_run:
verbose_logger.info("no-log request, skipping logging") continue
continue
if callback == "promptlayer" and promptLayerLogger is not None: if callback == "promptlayer" and promptLayerLogger is not None:
print_verbose("reaches promptlayer for logging!") print_verbose("reaches promptlayer for logging!")
promptLayerLogger.log_event( promptLayerLogger.log_event(
@ -1626,18 +1645,14 @@ class Logging(LiteLLMLoggingBaseClass):
for callback in callbacks: for callback in callbacks:
# check if callback can run for this request # check if callback can run for this request
litellm_params = self.model_call_details.get("litellm_params", {}) litellm_params = self.model_call_details.get("litellm_params", {})
if litellm_params.get("no-log", False) is True: should_run = self.should_run_callback(
# proxy cost tracking cal backs should run callback=callback,
if not ( litellm_params=litellm_params,
isinstance(callback, CustomLogger) event_hook="async_success_handler",
and "_PROXY_" in callback.__class__.__name__ )
): if not should_run:
print_verbose("no-log request, skipping logging") continue
continue
try: try:
if kwargs.get("no-log", False) is True:
print_verbose("no-log request, skipping logging")
continue
if callback == "openmeter" and openMeterLogger is not None: if callback == "openmeter" and openMeterLogger is not None:
if self.stream is True: if self.stream is True:
if ( if (

View file

@ -1,7 +1,8 @@
model_list: model_list:
- model_name: gpt-3.5-turbo - model_name: gpt-3.5-turbo-testing
litellm_params: litellm_params:
model: gpt-3.5-turbo model: gpt-3.5-turbo
rpm: 3
- model_name: anthropic-claude - model_name: anthropic-claude
litellm_params: litellm_params:
model: claude-3-5-haiku-20241022 model: claude-3-5-haiku-20241022
@ -14,3 +15,7 @@ model_list:
litellm_params: litellm_params:
model: deepseek/* model: deepseek/*
api_key: os.environ/DEEPSEEK_API_KEY api_key: os.environ/DEEPSEEK_API_KEY
litellm_settings:
callbacks: ["langsmith"]
disable_no_log_param: true

View file

@ -3112,7 +3112,6 @@ class Router:
deployment_num_retries, int deployment_num_retries, int
): ):
num_retries = deployment_num_retries num_retries = deployment_num_retries
""" """
Retry Logic Retry Logic
""" """
@ -3149,6 +3148,9 @@ class Router:
else: else:
raise raise
verbose_router_logger.info(
f"Retrying request with num_retries: {num_retries}"
)
# decides how long to sleep before retry # decides how long to sleep before retry
retry_after = self._time_to_sleep_before_retry( retry_after = self._time_to_sleep_before_retry(
e=original_exception, e=original_exception,

View file

@ -145,7 +145,6 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
Raises - RateLimitError if deployment over defined RPM limit Raises - RateLimitError if deployment over defined RPM limit
""" """
try: try:
# ------------ # ------------
# Setup values # Setup values
# ------------ # ------------
@ -183,6 +182,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
headers={"retry-after": str(60)}, # type: ignore headers={"retry-after": str(60)}, # type: ignore
request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore
), ),
num_retries=deployment.get("num_retries"),
) )
else: else:
# if local result below limit, check redis ## prevent unnecessary redis checks # if local result below limit, check redis ## prevent unnecessary redis checks
@ -209,8 +209,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
headers={"retry-after": str(60)}, # type: ignore headers={"retry-after": str(60)}, # type: ignore
request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore
), ),
num_retries=deployment.get("num_retries"),
) )
return deployment return deployment
except Exception as e: except Exception as e:
if isinstance(e, litellm.RateLimitError): if isinstance(e, litellm.RateLimitError):
@ -540,7 +540,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
"rpm_limit": _deployment_rpm, "rpm_limit": _deployment_rpm,
} }
raise litellm.RateLimitError( raise litellm.RateLimitError(
message=f"{RouterErrors.no_deployments_available.value}. 12345 Passed model={model_group}. Deployments={deployment_dict}", message=f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}",
llm_provider="", llm_provider="",
model=model_group, model=model_group,
response=httpx.Response( response=httpx.Response(

View file

@ -733,7 +733,7 @@ def test_bedrock_stop_value(stop, model):
"model", "model",
[ [
"anthropic.claude-3-sonnet-20240229-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0",
"meta.llama3-70b-instruct-v1:0", # "meta.llama3-70b-instruct-v1:0",
"anthropic.claude-v2", "anthropic.claude-v2",
"mistral.mixtral-8x7b-instruct-v0:1", "mistral.mixtral-8x7b-instruct-v0:1",
], ],

View file

@ -1683,3 +1683,32 @@ def test_standard_logging_retries():
"standard_logging_object" "standard_logging_object"
]["trace_id"] ]["trace_id"]
) )
@pytest.mark.parametrize("disable_no_log_param", [True, False])
def test_litellm_logging_no_log_param(monkeypatch, disable_no_log_param):
monkeypatch.setattr(litellm, "global_disable_no_log_param", disable_no_log_param)
from litellm.litellm_core_utils.litellm_logging import Logging
litellm.success_callback = ["langfuse"]
litellm_call_id = "my-unique-call-id"
litellm_logging_obj = Logging(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hi"}],
stream=False,
call_type="acompletion",
litellm_call_id=litellm_call_id,
start_time=datetime.now(),
function_id="1234",
)
should_run = litellm_logging_obj.should_run_callback(
callback="langfuse",
litellm_params={"no-log": True},
event_hook="success_handler",
)
if disable_no_log_param:
assert should_run is True
else:
assert should_run is False

View file

@ -228,3 +228,11 @@ def test_default_on_guardrail():
) )
== True == True
) )
assert (
guardrail.should_run_guardrail(
{"metadata": {"guardrails": []}},
GuardrailEventHooks.pre_call,
)
== True
)