build(pyproject.toml): add new dev dependencies - for type checking (#9631)

* build(pyproject.toml): add new dev dependencies - for type checking * build: reformat files to fit black * ci: reformat to fit black * ci(test-litellm.yml): make tests run clear * build(pyproject.toml): add ruff * fix: fix ruff checks * build(mypy/): fix mypy linting errors * fix(hashicorp_secret_manager.py): fix passing cert for tls auth * build(mypy/): resolve all mypy errors * test: update test * fix: fix black formatting * build(pre-commit-config.yaml): use poetry run black * fix(proxy_server.py): fix linting error * fix: fix ruff safe representation error
2025-04-25 18:54:30 +00:00 · 2025-03-29 11:02:13 -07:00 · 2025-03-29 11:02:13 -07:00 · 9b7ebb6a7d
commit 9b7ebb6a7d
parent 95e5dfae5a
214 changed files with 1553 additions and 1433 deletions
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -22,7 +22,7 @@ if TYPE_CHECKING:

    from litellm.proxy.utils import InternalUsageCache as _InternalUsageCache

-    Span = _Span
+    Span = Union[_Span, Any]
    InternalUsageCache = _InternalUsageCache
 else:
    Span = Any
@ -201,7 +201,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
        if rpm_limit is None:
            rpm_limit = sys.maxsize

-        values_to_update_in_cache: List[Tuple[Any, Any]] = (
+        values_to_update_in_cache: List[
+            Tuple[Any, Any]
+        ] = (
            []
        )  # values that need to get updated in cache, will run a batch_set_cache after this function

@ -678,9 +680,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        try:
            self.print_verbose("Inside Max Parallel Request Failure Hook")
-            litellm_parent_otel_span: Union[Span, None] = (
-                _get_parent_otel_span_from_kwargs(kwargs=kwargs)
-            )
+            litellm_parent_otel_span: Union[
+                Span, None
+            ] = _get_parent_otel_span_from_kwargs(kwargs=kwargs)
            _metadata = kwargs["litellm_params"].get("metadata", {}) or {}
            global_max_parallel_requests = _metadata.get(
                "global_max_parallel_requests", None
@ -807,11 +809,11 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
        current_minute = datetime.now().strftime("%M")
        precise_minute = f"{current_date}-{current_hour}-{current_minute}"
        request_count_api_key = f"{api_key}::{precise_minute}::request_count"
-        current: Optional[CurrentItemRateLimit] = (
-            await self.internal_usage_cache.async_get_cache(
-                key=request_count_api_key,
-                litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
-            )
+        current: Optional[
+            CurrentItemRateLimit
+        ] = await self.internal_usage_cache.async_get_cache(
+            key=request_count_api_key,
+            litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
        )

        key_remaining_rpm_limit: Optional[int] = None
@ -843,15 +845,15 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
            _additional_headers = _hidden_params.get("additional_headers", {}) or {}

            if key_remaining_rpm_limit is not None:
-                _additional_headers["x-ratelimit-remaining-requests"] = (
-                    key_remaining_rpm_limit
-                )
+                _additional_headers[
+                    "x-ratelimit-remaining-requests"
+                ] = key_remaining_rpm_limit
            if key_rpm_limit is not None:
                _additional_headers["x-ratelimit-limit-requests"] = key_rpm_limit
            if key_remaining_tpm_limit is not None:
-                _additional_headers["x-ratelimit-remaining-tokens"] = (
-                    key_remaining_tpm_limit
-                )
+                _additional_headers[
+                    "x-ratelimit-remaining-tokens"
+                ] = key_remaining_tpm_limit
            if key_tpm_limit is not None:
                _additional_headers["x-ratelimit-limit-tokens"] = key_tpm_limit