build(pyproject.toml): add new dev dependencies - for type checking (#9631)

* build(pyproject.toml): add new dev dependencies - for type checking

* build: reformat files to fit black

* ci: reformat to fit black

* ci(test-litellm.yml): make tests run clear

* build(pyproject.toml): add ruff

* fix: fix ruff checks

* build(mypy/): fix mypy linting errors

* fix(hashicorp_secret_manager.py): fix passing cert for tls auth

* build(mypy/): resolve all mypy errors

* test: update test

* fix: fix black formatting

* build(pre-commit-config.yaml): use poetry run black

* fix(proxy_server.py): fix linting error

* fix: fix ruff safe representation error
This commit is contained in:
Krish Dholakia 2025-03-29 11:02:13 -07:00 committed by GitHub
parent 95e5dfae5a
commit 9b7ebb6a7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
214 changed files with 1553 additions and 1433 deletions

View file

@ -22,7 +22,7 @@ if TYPE_CHECKING:
from litellm.proxy.utils import InternalUsageCache as _InternalUsageCache
Span = _Span
Span = Union[_Span, Any]
InternalUsageCache = _InternalUsageCache
else:
Span = Any
@ -201,7 +201,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
if rpm_limit is None:
rpm_limit = sys.maxsize
values_to_update_in_cache: List[Tuple[Any, Any]] = (
values_to_update_in_cache: List[
Tuple[Any, Any]
] = (
[]
) # values that need to get updated in cache, will run a batch_set_cache after this function
@ -678,9 +680,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
self.print_verbose("Inside Max Parallel Request Failure Hook")
litellm_parent_otel_span: Union[Span, None] = (
_get_parent_otel_span_from_kwargs(kwargs=kwargs)
)
litellm_parent_otel_span: Union[
Span, None
] = _get_parent_otel_span_from_kwargs(kwargs=kwargs)
_metadata = kwargs["litellm_params"].get("metadata", {}) or {}
global_max_parallel_requests = _metadata.get(
"global_max_parallel_requests", None
@ -807,11 +809,11 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
current_minute = datetime.now().strftime("%M")
precise_minute = f"{current_date}-{current_hour}-{current_minute}"
request_count_api_key = f"{api_key}::{precise_minute}::request_count"
current: Optional[CurrentItemRateLimit] = (
await self.internal_usage_cache.async_get_cache(
key=request_count_api_key,
litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
)
current: Optional[
CurrentItemRateLimit
] = await self.internal_usage_cache.async_get_cache(
key=request_count_api_key,
litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
)
key_remaining_rpm_limit: Optional[int] = None
@ -843,15 +845,15 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
_additional_headers = _hidden_params.get("additional_headers", {}) or {}
if key_remaining_rpm_limit is not None:
_additional_headers["x-ratelimit-remaining-requests"] = (
key_remaining_rpm_limit
)
_additional_headers[
"x-ratelimit-remaining-requests"
] = key_remaining_rpm_limit
if key_rpm_limit is not None:
_additional_headers["x-ratelimit-limit-requests"] = key_rpm_limit
if key_remaining_tpm_limit is not None:
_additional_headers["x-ratelimit-remaining-tokens"] = (
key_remaining_tpm_limit
)
_additional_headers[
"x-ratelimit-remaining-tokens"
] = key_remaining_tpm_limit
if key_tpm_limit is not None:
_additional_headers["x-ratelimit-limit-tokens"] = key_tpm_limit