mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Litellm dev 01 30 2025 p2 (#8134)
* feat(lowest_tpm_rpm_v2.py): fix redis cache check to use >= instead of > makes it consistent * test(test_custom_guardrails.py): add more unit testing on default on guardrails ensure it runs if user sent guardrail list is empty * docs(quick_start.md): clarify default on guardrails run even if user guardrails list contains other guardrails * refactor(litellm_logging.py): refactor no-log to helper util allows for more consistent behavior * feat(litellm_logging.py): add event hook to verbose logs * fix(litellm_logging.py): add unit testing to ensure `litellm.disable_no_log_param` is respected * docs(logging.md): document how to disable 'no-log' param * test: fix test to handle feb * test: cleanup old bedrock model * fix: fix router check
This commit is contained in:
parent
78a21b66a2
commit
2eee7f978f
10 changed files with 103 additions and 30 deletions
|
@ -125,6 +125,8 @@ curl -i http://localhost:4000/v1/chat/completions \
|
||||||
|
|
||||||
Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it.
|
Set `default_on: true` in your guardrail config to run the guardrail on every request. This is useful if you want to run a guardrail on every request without the user having to specify it.
|
||||||
|
|
||||||
|
**Note:** These will run even if user specifies a different guardrail or empty guardrails array.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
guardrails:
|
guardrails:
|
||||||
- guardrail_name: "aporia-pre-guard"
|
- guardrail_name: "aporia-pre-guard"
|
||||||
|
|
|
@ -116,6 +116,16 @@ Removes any field with `user_api_key_*` from metadata.
|
||||||
|
|
||||||
For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body.
|
For some use cases, you may want to turn off all tracking/logging. You can do this by passing `no-log=True` in the request body.
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Disable this by setting `global_disable_no_log_param:true` in your config.yaml file.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
global_disable_no_log_param: True
|
||||||
|
```
|
||||||
|
:::
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="Curl" label="Curl Request">
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
|
|
@ -51,11 +51,12 @@ if set_verbose == True:
|
||||||
_turn_on_debug()
|
_turn_on_debug()
|
||||||
###############################################
|
###############################################
|
||||||
### Callbacks /Logging / Success / Failure Handlers #####
|
### Callbacks /Logging / Success / Failure Handlers #####
|
||||||
|
CALLBACK_TYPES = Union[str, Callable, CustomLogger]
|
||||||
|
input_callback: List[CALLBACK_TYPES] = []
|
||||||
|
success_callback: List[CALLBACK_TYPES] = []
|
||||||
|
failure_callback: List[CALLBACK_TYPES] = []
|
||||||
|
service_callback: List[CALLBACK_TYPES] = []
|
||||||
logging_callback_manager = LoggingCallbackManager()
|
logging_callback_manager = LoggingCallbackManager()
|
||||||
input_callback: List[Union[str, Callable, CustomLogger]] = []
|
|
||||||
success_callback: List[Union[str, Callable, CustomLogger]] = []
|
|
||||||
failure_callback: List[Union[str, Callable, CustomLogger]] = []
|
|
||||||
service_callback: List[Union[str, Callable, CustomLogger]] = []
|
|
||||||
_custom_logger_compatible_callbacks_literal = Literal[
|
_custom_logger_compatible_callbacks_literal = Literal[
|
||||||
"lago",
|
"lago",
|
||||||
"openmeter",
|
"openmeter",
|
||||||
|
@ -1276,3 +1277,4 @@ custom_provider_map: List[CustomLLMItem] = []
|
||||||
_custom_providers: List[str] = (
|
_custom_providers: List[str] = (
|
||||||
[]
|
[]
|
||||||
) # internal helper util, used to track names of custom providers
|
) # internal helper util, used to track names of custom providers
|
||||||
|
global_disable_no_log_param: bool = False
|
||||||
|
|
|
@ -867,6 +867,26 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def should_run_callback(
|
||||||
|
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
|
||||||
|
) -> bool:
|
||||||
|
|
||||||
|
if litellm.global_disable_no_log_param:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if litellm_params.get("no-log", False) is True:
|
||||||
|
# proxy cost tracking cal backs should run
|
||||||
|
|
||||||
|
if not (
|
||||||
|
isinstance(callback, CustomLogger)
|
||||||
|
and "_PROXY_" in callback.__class__.__name__
|
||||||
|
):
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"no-log request, skipping logging for {event_hook} event"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def _success_handler_helper_fn(
|
def _success_handler_helper_fn(
|
||||||
self,
|
self,
|
||||||
result=None,
|
result=None,
|
||||||
|
@ -1072,13 +1092,12 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
try:
|
try:
|
||||||
litellm_params = self.model_call_details.get("litellm_params", {})
|
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
if litellm_params.get("no-log", False) is True:
|
should_run = self.should_run_callback(
|
||||||
# proxy cost tracking cal backs should run
|
callback=callback,
|
||||||
if not (
|
litellm_params=litellm_params,
|
||||||
isinstance(callback, CustomLogger)
|
event_hook="success_handler",
|
||||||
and "_PROXY_" in callback.__class__.__name__
|
)
|
||||||
):
|
if not should_run:
|
||||||
verbose_logger.info("no-log request, skipping logging")
|
|
||||||
continue
|
continue
|
||||||
if callback == "promptlayer" and promptLayerLogger is not None:
|
if callback == "promptlayer" and promptLayerLogger is not None:
|
||||||
print_verbose("reaches promptlayer for logging!")
|
print_verbose("reaches promptlayer for logging!")
|
||||||
|
@ -1626,18 +1645,14 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
# check if callback can run for this request
|
# check if callback can run for this request
|
||||||
litellm_params = self.model_call_details.get("litellm_params", {})
|
litellm_params = self.model_call_details.get("litellm_params", {})
|
||||||
if litellm_params.get("no-log", False) is True:
|
should_run = self.should_run_callback(
|
||||||
# proxy cost tracking cal backs should run
|
callback=callback,
|
||||||
if not (
|
litellm_params=litellm_params,
|
||||||
isinstance(callback, CustomLogger)
|
event_hook="async_success_handler",
|
||||||
and "_PROXY_" in callback.__class__.__name__
|
)
|
||||||
):
|
if not should_run:
|
||||||
print_verbose("no-log request, skipping logging")
|
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
if kwargs.get("no-log", False) is True:
|
|
||||||
print_verbose("no-log request, skipping logging")
|
|
||||||
continue
|
|
||||||
if callback == "openmeter" and openMeterLogger is not None:
|
if callback == "openmeter" and openMeterLogger is not None:
|
||||||
if self.stream is True:
|
if self.stream is True:
|
||||||
if (
|
if (
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-3.5-turbo
|
- model_name: gpt-3.5-turbo-testing
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: gpt-3.5-turbo
|
||||||
|
rpm: 3
|
||||||
- model_name: anthropic-claude
|
- model_name: anthropic-claude
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: claude-3-5-haiku-20241022
|
model: claude-3-5-haiku-20241022
|
||||||
|
@ -14,3 +15,7 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: deepseek/*
|
model: deepseek/*
|
||||||
api_key: os.environ/DEEPSEEK_API_KEY
|
api_key: os.environ/DEEPSEEK_API_KEY
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["langsmith"]
|
||||||
|
disable_no_log_param: true
|
|
@ -3112,7 +3112,6 @@ class Router:
|
||||||
deployment_num_retries, int
|
deployment_num_retries, int
|
||||||
):
|
):
|
||||||
num_retries = deployment_num_retries
|
num_retries = deployment_num_retries
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Retry Logic
|
Retry Logic
|
||||||
"""
|
"""
|
||||||
|
@ -3149,6 +3148,9 @@ class Router:
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
verbose_router_logger.info(
|
||||||
|
f"Retrying request with num_retries: {num_retries}"
|
||||||
|
)
|
||||||
# decides how long to sleep before retry
|
# decides how long to sleep before retry
|
||||||
retry_after = self._time_to_sleep_before_retry(
|
retry_after = self._time_to_sleep_before_retry(
|
||||||
e=original_exception,
|
e=original_exception,
|
||||||
|
|
|
@ -145,7 +145,6 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
Raises - RateLimitError if deployment over defined RPM limit
|
Raises - RateLimitError if deployment over defined RPM limit
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
# ------------
|
# ------------
|
||||||
# Setup values
|
# Setup values
|
||||||
# ------------
|
# ------------
|
||||||
|
@ -183,6 +182,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
headers={"retry-after": str(60)}, # type: ignore
|
headers={"retry-after": str(60)}, # type: ignore
|
||||||
request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore
|
request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
),
|
),
|
||||||
|
num_retries=deployment.get("num_retries"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# if local result below limit, check redis ## prevent unnecessary redis checks
|
# if local result below limit, check redis ## prevent unnecessary redis checks
|
||||||
|
@ -209,8 +209,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
headers={"retry-after": str(60)}, # type: ignore
|
headers={"retry-after": str(60)}, # type: ignore
|
||||||
request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore
|
request=httpx.Request(method="tpm_rpm_limits", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||||
),
|
),
|
||||||
|
num_retries=deployment.get("num_retries"),
|
||||||
)
|
)
|
||||||
|
|
||||||
return deployment
|
return deployment
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if isinstance(e, litellm.RateLimitError):
|
if isinstance(e, litellm.RateLimitError):
|
||||||
|
@ -540,7 +540,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
"rpm_limit": _deployment_rpm,
|
"rpm_limit": _deployment_rpm,
|
||||||
}
|
}
|
||||||
raise litellm.RateLimitError(
|
raise litellm.RateLimitError(
|
||||||
message=f"{RouterErrors.no_deployments_available.value}. 12345 Passed model={model_group}. Deployments={deployment_dict}",
|
message=f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}",
|
||||||
llm_provider="",
|
llm_provider="",
|
||||||
model=model_group,
|
model=model_group,
|
||||||
response=httpx.Response(
|
response=httpx.Response(
|
||||||
|
|
|
@ -733,7 +733,7 @@ def test_bedrock_stop_value(stop, model):
|
||||||
"model",
|
"model",
|
||||||
[
|
[
|
||||||
"anthropic.claude-3-sonnet-20240229-v1:0",
|
"anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
"meta.llama3-70b-instruct-v1:0",
|
# "meta.llama3-70b-instruct-v1:0",
|
||||||
"anthropic.claude-v2",
|
"anthropic.claude-v2",
|
||||||
"mistral.mixtral-8x7b-instruct-v0:1",
|
"mistral.mixtral-8x7b-instruct-v0:1",
|
||||||
],
|
],
|
||||||
|
|
|
@ -1683,3 +1683,32 @@ def test_standard_logging_retries():
|
||||||
"standard_logging_object"
|
"standard_logging_object"
|
||||||
]["trace_id"]
|
]["trace_id"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("disable_no_log_param", [True, False])
|
||||||
|
def test_litellm_logging_no_log_param(monkeypatch, disable_no_log_param):
|
||||||
|
monkeypatch.setattr(litellm, "global_disable_no_log_param", disable_no_log_param)
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
|
|
||||||
|
litellm.success_callback = ["langfuse"]
|
||||||
|
litellm_call_id = "my-unique-call-id"
|
||||||
|
litellm_logging_obj = Logging(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
stream=False,
|
||||||
|
call_type="acompletion",
|
||||||
|
litellm_call_id=litellm_call_id,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
function_id="1234",
|
||||||
|
)
|
||||||
|
|
||||||
|
should_run = litellm_logging_obj.should_run_callback(
|
||||||
|
callback="langfuse",
|
||||||
|
litellm_params={"no-log": True},
|
||||||
|
event_hook="success_handler",
|
||||||
|
)
|
||||||
|
|
||||||
|
if disable_no_log_param:
|
||||||
|
assert should_run is True
|
||||||
|
else:
|
||||||
|
assert should_run is False
|
||||||
|
|
|
@ -228,3 +228,11 @@ def test_default_on_guardrail():
|
||||||
)
|
)
|
||||||
== True
|
== True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
guardrail.should_run_guardrail(
|
||||||
|
{"metadata": {"guardrails": []}},
|
||||||
|
GuardrailEventHooks.pre_call,
|
||||||
|
)
|
||||||
|
== True
|
||||||
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue