fix(router.py): fix datetime object

This commit is contained in:
Krrish Dholakia 2024-04-10 17:55:24 -07:00
parent 2531701a2a
commit 37ac17aebd
3 changed files with 33 additions and 15 deletions

View file

@ -26,7 +26,7 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
CustomHTTPTransport, CustomHTTPTransport,
AsyncCustomHTTPTransport, AsyncCustomHTTPTransport,
) )
from litellm.utils import ModelResponse, CustomStreamWrapper from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
import copy import copy
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
import logging import logging
@ -588,7 +588,7 @@ class Router:
verbose_router_logger.debug( verbose_router_logger.debug(
f"Inside _image_generation()- model: {model}; kwargs: {kwargs}" f"Inside _image_generation()- model: {model}; kwargs: {kwargs}"
) )
deployment = self.get_available_deployment( deployment = await self.async_get_available_deployment(
model=model, model=model,
messages=[{"role": "user", "content": "prompt"}], messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
@ -688,7 +688,7 @@ class Router:
verbose_router_logger.debug( verbose_router_logger.debug(
f"Inside _atranscription()- model: {model}; kwargs: {kwargs}" f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"
) )
deployment = self.get_available_deployment( deployment = await self.async_get_available_deployment(
model=model, model=model,
messages=[{"role": "user", "content": "prompt"}], messages=[{"role": "user", "content": "prompt"}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
@ -768,7 +768,7 @@ class Router:
verbose_router_logger.debug( verbose_router_logger.debug(
f"Inside _moderation()- model: {model}; kwargs: {kwargs}" f"Inside _moderation()- model: {model}; kwargs: {kwargs}"
) )
deployment = self.get_available_deployment( deployment = await self.async_get_available_deployment(
model=model, model=model,
input=input, input=input,
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
@ -911,7 +911,7 @@ class Router:
verbose_router_logger.debug( verbose_router_logger.debug(
f"Inside _atext_completion()- model: {model}; kwargs: {kwargs}" f"Inside _atext_completion()- model: {model}; kwargs: {kwargs}"
) )
deployment = self.get_available_deployment( deployment = await self.async_get_available_deployment(
model=model, model=model,
messages=[{"role": "user", "content": prompt}], messages=[{"role": "user", "content": prompt}],
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
@ -1077,7 +1077,7 @@ class Router:
verbose_router_logger.debug( verbose_router_logger.debug(
f"Inside _aembedding()- model: {model}; kwargs: {kwargs}" f"Inside _aembedding()- model: {model}; kwargs: {kwargs}"
) )
deployment = self.get_available_deployment( deployment = await self.async_get_available_deployment(
model=model, model=model,
input=input, input=input,
specific_deployment=kwargs.pop("specific_deployment", None), specific_deployment=kwargs.pop("specific_deployment", None),
@ -1605,7 +1605,8 @@ class Router:
if deployment is None: if deployment is None:
return return
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
# get current fails for deployment # get current fails for deployment
# update the number of failed calls # update the number of failed calls
# if it's > allowed fails # if it's > allowed fails
@ -1647,7 +1648,8 @@ class Router:
""" """
Async implementation of '_get_cooldown_deployments' Async implementation of '_get_cooldown_deployments'
""" """
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
# get the current cooldown list for that minute # get the current cooldown list for that minute
cooldown_key = f"{current_minute}:cooldown_models" cooldown_key = f"{current_minute}:cooldown_models"
@ -1663,7 +1665,8 @@ class Router:
""" """
Get the list of models being cooled down for this minute Get the list of models being cooled down for this minute
""" """
current_minute = datetime.now().strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
# get the current cooldown list for that minute # get the current cooldown list for that minute
cooldown_key = f"{current_minute}:cooldown_models" cooldown_key = f"{current_minute}:cooldown_models"
@ -2336,7 +2339,8 @@ class Router:
_rate_limit_error = False _rate_limit_error = False
## get model group RPM ## ## get model group RPM ##
current_minute = datetime.now().strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
rpm_key = f"{model}:rpm:{current_minute}" rpm_key = f"{model}:rpm:{current_minute}"
model_group_cache = ( model_group_cache = (
self.cache.get_cache(key=rpm_key, local_only=True) or {} self.cache.get_cache(key=rpm_key, local_only=True) or {}

View file

@ -12,7 +12,7 @@ from litellm import token_counter
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
from litellm.utils import print_verbose from litellm.utils import print_verbose, get_utc_datetime
class LowestTPMLoggingHandler_v2(CustomLogger): class LowestTPMLoggingHandler_v2(CustomLogger):
@ -60,7 +60,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
# ------------ # ------------
# Setup values # Setup values
# ------------ # ------------
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
tpm_key = f"{model_group}:tpm:{current_minute}" tpm_key = f"{model_group}:tpm:{current_minute}"
rpm_key = f"{model_group}:rpm:{current_minute}" rpm_key = f"{model_group}:rpm:{current_minute}"
@ -110,7 +111,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
# ------------ # ------------
# Setup values # Setup values
# ------------ # ------------
current_minute = datetime.now(datetime_og.UTC).strftime( dt = get_utc_datetime()
current_minute = dt.strftime(
"%H-%M" "%H-%M"
) # use the same timezone regardless of system clock ) # use the same timezone regardless of system clock
@ -241,7 +243,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}" f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
) )
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
tpm_keys = [] tpm_keys = []
rpm_keys = [] rpm_keys = []
for m in healthy_deployments: for m in healthy_deployments:
@ -288,7 +291,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}" f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
) )
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
tpm_keys = [] tpm_keys = []
rpm_keys = [] rpm_keys = []
for m in healthy_deployments: for m in healthy_deployments:

View file

@ -5908,6 +5908,16 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]):
return api_key return api_key
def get_utc_datetime():
import datetime as dt
from datetime import datetime
if hasattr(dt, "UTC"):
return datetime.now(dt.UTC) # type: ignore
else:
return datetime.utcnow() # type: ignore
def get_max_tokens(model: str): def get_max_tokens(model: str):
""" """
Get the maximum number of output tokens allowed for a given model. Get the maximum number of output tokens allowed for a given model.