LiteLLM Minor fixes + improvements (08/04/2024) (#5505)

* Minor IAM AWS OIDC Improvements (#5246)

* AWS IAM: Temporary tokens are valid across all regions after being issued, so it is wasteful to request one for each region.

* AWS IAM: Include an inline policy, to help reduce misuse of overly permissive IAM roles.

* (test_bedrock_completion.py): Ensure we are testing cross AWS region OIDC flow.

* fix(router.py): log rejected requests

Fixes https://github.com/BerriAI/litellm/issues/5498

* refactor: don't use verbose_logger.exception, if exception is raised

User might already have handling for this. But alerting systems in prod will raise this as an unhandled error.

* fix(datadog.py): support setting datadog source as an env var

Fixes https://github.com/BerriAI/litellm/issues/5508

* docs(logging.md): add dd_source to datadog docs

* fix(proxy_server.py): expose `/customer/list` endpoint for showing all customers

* (bedrock): Fix usage with Cloudflare AI Gateway, and proxies in general. (#5509)

* feat(anthropic.py): support 'cache_control' param for content when it is a string

* Revert "(bedrock): Fix usage with Cloudflare AI Gateway, and proxies in gener…" (#5519)

This reverts commit 3fac0349c2.

* refactor: ci/cd run again

---------

Co-authored-by: David Manouchehri <david.manouchehri@ai.moda>
This commit is contained in:
Krish Dholakia 2024-09-04 22:16:55 -07:00 committed by GitHub
parent cdc312d51d
commit 1e7e538261
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 383 additions and 247 deletions

View file

@ -47,6 +47,7 @@ from litellm._logging import verbose_router_logger
from litellm.assistants.main import AssistantDeleted
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
@ -783,6 +784,10 @@ class Router:
}
)
logging_obj: Optional[LiteLLMLogging] = kwargs.get(
"litellm_logging_obj", None
)
rpm_semaphore = self._get_client(
deployment=deployment,
kwargs=kwargs,
@ -797,11 +802,13 @@ class Router:
- If allowed, increment the rpm limit (allows global value to be updated, concurrency-safe)
"""
await self.async_routing_strategy_pre_call_checks(
deployment=deployment
deployment=deployment, logging_obj=logging_obj
)
response = await _response
else:
await self.async_routing_strategy_pre_call_checks(deployment=deployment)
await self.async_routing_strategy_pre_call_checks(
deployment=deployment, logging_obj=logging_obj
)
response = await _response
## CHECK CONTENT FILTER ERROR ##
@ -3860,7 +3867,9 @@ class Router:
if isinstance(_callback, CustomLogger):
response = _callback.pre_call_check(deployment)
async def async_routing_strategy_pre_call_checks(self, deployment: dict):
async def async_routing_strategy_pre_call_checks(
self, deployment: dict, logging_obj: Optional[LiteLLMLogging] = None
):
"""
For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.
@ -3875,8 +3884,22 @@ class Router:
for _callback in litellm.callbacks:
if isinstance(_callback, CustomLogger):
try:
response = await _callback.async_pre_call_check(deployment)
_ = await _callback.async_pre_call_check(deployment)
except litellm.RateLimitError as e:
## LOG FAILURE EVENT
if logging_obj is not None:
asyncio.create_task(
logging_obj.async_failure_handler(
exception=e,
traceback_exception=traceback.format_exc(),
end_time=time.time(),
)
)
## LOGGING
threading.Thread(
target=logging_obj.failure_handler,
args=(e, traceback.format_exc()),
).start() # log response
self._set_cooldown_deployments(
exception_status=e.status_code,
original_exception=e,
@ -3885,6 +3908,20 @@ class Router:
)
raise e
except Exception as e:
## LOG FAILURE EVENT
if logging_obj is not None:
asyncio.create_task(
logging_obj.async_failure_handler(
exception=e,
traceback_exception=traceback.format_exc(),
end_time=time.time(),
)
)
## LOGGING
threading.Thread(
target=logging_obj.failure_handler,
args=(e, traceback.format_exc()),
).start() # log response
raise e
def _generate_model_id(self, model_group: str, litellm_params: dict):