mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
build(pyproject.toml): add new dev dependencies - for type checking (#9631)
* build(pyproject.toml): add new dev dependencies - for type checking * build: reformat files to fit black * ci: reformat to fit black * ci(test-litellm.yml): make tests run clear * build(pyproject.toml): add ruff * fix: fix ruff checks * build(mypy/): fix mypy linting errors * fix(hashicorp_secret_manager.py): fix passing cert for tls auth * build(mypy/): resolve all mypy errors * test: update test * fix: fix black formatting * build(pre-commit-config.yaml): use poetry run black * fix(proxy_server.py): fix linting error * fix: fix ruff safe representation error
This commit is contained in:
parent
72198737f8
commit
d7b294dd0a
214 changed files with 1553 additions and 1433 deletions
|
@ -71,7 +71,6 @@ class DynamicRateLimiterCache:
|
|||
|
||||
|
||||
class _PROXY_DynamicRateLimitHandler(CustomLogger):
|
||||
|
||||
# Class variables or attributes
|
||||
def __init__(self, internal_usage_cache: DualCache):
|
||||
self.internal_usage_cache = DynamicRateLimiterCache(cache=internal_usage_cache)
|
||||
|
@ -121,12 +120,13 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
|
|||
active_projects = await self.internal_usage_cache.async_get_cache(
|
||||
model=model
|
||||
)
|
||||
current_model_tpm, current_model_rpm = (
|
||||
await self.llm_router.get_model_group_usage(model_group=model)
|
||||
)
|
||||
model_group_info: Optional[ModelGroupInfo] = (
|
||||
self.llm_router.get_model_group_info(model_group=model)
|
||||
)
|
||||
(
|
||||
current_model_tpm,
|
||||
current_model_rpm,
|
||||
) = await self.llm_router.get_model_group_usage(model_group=model)
|
||||
model_group_info: Optional[
|
||||
ModelGroupInfo
|
||||
] = self.llm_router.get_model_group_info(model_group=model)
|
||||
total_model_tpm: Optional[int] = None
|
||||
total_model_rpm: Optional[int] = None
|
||||
if model_group_info is not None:
|
||||
|
@ -210,10 +210,14 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
|
|||
key_priority: Optional[str] = user_api_key_dict.metadata.get(
|
||||
"priority", None
|
||||
)
|
||||
available_tpm, available_rpm, model_tpm, model_rpm, active_projects = (
|
||||
await self.check_available_usage(
|
||||
model=data["model"], priority=key_priority
|
||||
)
|
||||
(
|
||||
available_tpm,
|
||||
available_rpm,
|
||||
model_tpm,
|
||||
model_rpm,
|
||||
active_projects,
|
||||
) = await self.check_available_usage(
|
||||
model=data["model"], priority=key_priority
|
||||
)
|
||||
### CHECK TPM ###
|
||||
if available_tpm is not None and available_tpm == 0:
|
||||
|
@ -267,21 +271,25 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
|
|||
key_priority: Optional[str] = user_api_key_dict.metadata.get(
|
||||
"priority", None
|
||||
)
|
||||
available_tpm, available_rpm, model_tpm, model_rpm, active_projects = (
|
||||
await self.check_available_usage(
|
||||
model=model_info["model_name"], priority=key_priority
|
||||
)
|
||||
)
|
||||
response._hidden_params["additional_headers"] = (
|
||||
{ # Add additional response headers - easier debugging
|
||||
"x-litellm-model_group": model_info["model_name"],
|
||||
"x-ratelimit-remaining-litellm-project-tokens": available_tpm,
|
||||
"x-ratelimit-remaining-litellm-project-requests": available_rpm,
|
||||
"x-ratelimit-remaining-model-tokens": model_tpm,
|
||||
"x-ratelimit-remaining-model-requests": model_rpm,
|
||||
"x-ratelimit-current-active-projects": active_projects,
|
||||
}
|
||||
(
|
||||
available_tpm,
|
||||
available_rpm,
|
||||
model_tpm,
|
||||
model_rpm,
|
||||
active_projects,
|
||||
) = await self.check_available_usage(
|
||||
model=model_info["model_name"], priority=key_priority
|
||||
)
|
||||
response._hidden_params[
|
||||
"additional_headers"
|
||||
] = { # Add additional response headers - easier debugging
|
||||
"x-litellm-model_group": model_info["model_name"],
|
||||
"x-ratelimit-remaining-litellm-project-tokens": available_tpm,
|
||||
"x-ratelimit-remaining-litellm-project-requests": available_rpm,
|
||||
"x-ratelimit-remaining-model-tokens": model_tpm,
|
||||
"x-ratelimit-remaining-model-requests": model_rpm,
|
||||
"x-ratelimit-current-active-projects": active_projects,
|
||||
}
|
||||
|
||||
return response
|
||||
return await super().async_post_call_success_hook(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue