mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
LiteLLM Minor fixes + improvements (08/04/2024) (#5505)
* Minor IAM AWS OIDC Improvements (#5246)
* AWS IAM: Temporary tokens are valid across all regions after being issued, so it is wasteful to request one for each region.
* AWS IAM: Include an inline policy, to help reduce misuse of overly permissive IAM roles.
* (test_bedrock_completion.py): Ensure we are testing cross AWS region OIDC flow.
* fix(router.py): log rejected requests
Fixes https://github.com/BerriAI/litellm/issues/5498
* refactor: don't use verbose_logger.exception, if exception is raised
User might already have handling for this. But alerting systems in prod will raise this as an unhandled error.
* fix(datadog.py): support setting datadog source as an env var
Fixes https://github.com/BerriAI/litellm/issues/5508
* docs(logging.md): add dd_source to datadog docs
* fix(proxy_server.py): expose `/customer/list` endpoint for showing all customers
* (bedrock): Fix usage with Cloudflare AI Gateway, and proxies in general. (#5509)
* feat(anthropic.py): support 'cache_control' param for content when it is a string
* Revert "(bedrock): Fix usage with Cloudflare AI Gateway, and proxies in gener…" (#5519)
This reverts commit 3fac0349c2
.
* refactor: ci/cd run again
---------
Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
parent
cdc312d51d
commit
1e7e538261
24 changed files with 383 additions and 247 deletions
|
@ -47,6 +47,7 @@ from litellm._logging import verbose_router_logger
|
|||
from litellm.assistants.main import AssistantDeleted
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
|
@ -783,6 +784,10 @@ class Router:
|
|||
}
|
||||
)
|
||||
|
||||
logging_obj: Optional[LiteLLMLogging] = kwargs.get(
|
||||
"litellm_logging_obj", None
|
||||
)
|
||||
|
||||
rpm_semaphore = self._get_client(
|
||||
deployment=deployment,
|
||||
kwargs=kwargs,
|
||||
|
@ -797,11 +802,13 @@ class Router:
|
|||
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
|
||||
"""
|
||||
await self.async_routing_strategy_pre_call_checks(
|
||||
deployment=deployment
|
||||
deployment=deployment, logging_obj=logging_obj
|
||||
)
|
||||
response = await _response
|
||||
else:
|
||||
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
|
||||
await self.async_routing_strategy_pre_call_checks(
|
||||
deployment=deployment, logging_obj=logging_obj
|
||||
)
|
||||
response = await _response
|
||||
|
||||
## CHECK CONTENT FILTER ERROR ##
|
||||
|
@ -3860,7 +3867,9 @@ class Router:
|
|||
if isinstance(_callback, CustomLogger):
|
||||
response = _callback.pre_call_check(deployment)
|
||||
|
||||
async def async_routing_strategy_pre_call_checks(self, deployment: dict):
|
||||
async def async_routing_strategy_pre_call_checks(
|
||||
self, deployment: dict, logging_obj: Optional[LiteLLMLogging] = None
|
||||
):
|
||||
"""
|
||||
For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.
|
||||
|
||||
|
@ -3875,8 +3884,22 @@ class Router:
|
|||
for _callback in litellm.callbacks:
|
||||
if isinstance(_callback, CustomLogger):
|
||||
try:
|
||||
response = await _callback.async_pre_call_check(deployment)
|
||||
_ = await _callback.async_pre_call_check(deployment)
|
||||
except litellm.RateLimitError as e:
|
||||
## LOG FAILURE EVENT
|
||||
if logging_obj is not None:
|
||||
asyncio.create_task(
|
||||
logging_obj.async_failure_handler(
|
||||
exception=e,
|
||||
traceback_exception=traceback.format_exc(),
|
||||
end_time=time.time(),
|
||||
)
|
||||
)
|
||||
## LOGGING
|
||||
threading.Thread(
|
||||
target=logging_obj.failure_handler,
|
||||
args=(e, traceback.format_exc()),
|
||||
).start() # log response
|
||||
self._set_cooldown_deployments(
|
||||
exception_status=e.status_code,
|
||||
original_exception=e,
|
||||
|
@ -3885,6 +3908,20 @@ class Router:
|
|||
)
|
||||
raise e
|
||||
except Exception as e:
|
||||
## LOG FAILURE EVENT
|
||||
if logging_obj is not None:
|
||||
asyncio.create_task(
|
||||
logging_obj.async_failure_handler(
|
||||
exception=e,
|
||||
traceback_exception=traceback.format_exc(),
|
||||
end_time=time.time(),
|
||||
)
|
||||
)
|
||||
## LOGGING
|
||||
threading.Thread(
|
||||
target=logging_obj.failure_handler,
|
||||
args=(e, traceback.format_exc()),
|
||||
).start() # log response
|
||||
raise e
|
||||
|
||||
def _generate_model_id(self, model_group: str, litellm_params: dict):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue