diff --git a/litellm/router.py b/litellm/router.py index e343b71d7..ca28dcb07 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -26,7 +26,7 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import ( CustomHTTPTransport, AsyncCustomHTTPTransport, ) -from litellm.utils import ModelResponse, CustomStreamWrapper +from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime import copy from litellm._logging import verbose_router_logger import logging @@ -588,7 +588,7 @@ class Router: verbose_router_logger.debug( f"Inside _image_generation()- model: {model}; kwargs: {kwargs}" ) - deployment = self.get_available_deployment( + deployment = await self.async_get_available_deployment( model=model, messages=[{"role": "user", "content": "prompt"}], specific_deployment=kwargs.pop("specific_deployment", None), @@ -688,7 +688,7 @@ class Router: verbose_router_logger.debug( f"Inside _atranscription()- model: {model}; kwargs: {kwargs}" ) - deployment = self.get_available_deployment( + deployment = await self.async_get_available_deployment( model=model, messages=[{"role": "user", "content": "prompt"}], specific_deployment=kwargs.pop("specific_deployment", None), @@ -768,7 +768,7 @@ class Router: verbose_router_logger.debug( f"Inside _moderation()- model: {model}; kwargs: {kwargs}" ) - deployment = self.get_available_deployment( + deployment = await self.async_get_available_deployment( model=model, input=input, specific_deployment=kwargs.pop("specific_deployment", None), @@ -911,7 +911,7 @@ class Router: verbose_router_logger.debug( f"Inside _atext_completion()- model: {model}; kwargs: {kwargs}" ) - deployment = self.get_available_deployment( + deployment = await self.async_get_available_deployment( model=model, messages=[{"role": "user", "content": prompt}], specific_deployment=kwargs.pop("specific_deployment", None), @@ -1077,7 +1077,7 @@ class Router: verbose_router_logger.debug( f"Inside _aembedding()- model: {model}; kwargs: {kwargs}" ) - deployment = self.get_available_deployment( + deployment = await self.async_get_available_deployment( model=model, input=input, specific_deployment=kwargs.pop("specific_deployment", None), @@ -1605,7 +1605,8 @@ class Router: if deployment is None: return - current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") # get current fails for deployment # update the number of failed calls # if it's > allowed fails @@ -1647,7 +1648,8 @@ class Router: """ Async implementation of '_get_cooldown_deployments' """ - current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") # get the current cooldown list for that minute cooldown_key = f"{current_minute}:cooldown_models" @@ -1663,7 +1665,8 @@ class Router: """ Get the list of models being cooled down for this minute """ - current_minute = datetime.now().strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") # get the current cooldown list for that minute cooldown_key = f"{current_minute}:cooldown_models" @@ -2336,7 +2339,8 @@ class Router: _rate_limit_error = False ## get model group RPM ## - current_minute = datetime.now().strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") rpm_key = f"{model}:rpm:{current_minute}" model_group_cache = ( self.cache.get_cache(key=rpm_key, local_only=True) or {} diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index 8f9f57fd9..c5598c11e 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -12,7 +12,7 @@ from litellm import token_counter from litellm.caching import DualCache from litellm.integrations.custom_logger import CustomLogger from litellm._logging import verbose_router_logger -from litellm.utils import print_verbose +from litellm.utils import print_verbose, get_utc_datetime class LowestTPMLoggingHandler_v2(CustomLogger): @@ -60,7 +60,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger): # ------------ # Setup values # ------------ - current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") tpm_key = f"{model_group}:tpm:{current_minute}" rpm_key = f"{model_group}:rpm:{current_minute}" @@ -110,7 +111,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger): # ------------ # Setup values # ------------ - current_minute = datetime.now(datetime_og.UTC).strftime( + dt = get_utc_datetime() + current_minute = dt.strftime( "%H-%M" ) # use the same timezone regardless of system clock @@ -241,7 +243,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger): f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}" ) - current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") tpm_keys = [] rpm_keys = [] for m in healthy_deployments: @@ -288,7 +291,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger): f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}" ) - current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M") + dt = get_utc_datetime() + current_minute = dt.strftime("%H-%M") tpm_keys = [] rpm_keys = [] for m in healthy_deployments: diff --git a/litellm/utils.py b/litellm/utils.py index b72822517..5c5f816c8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5908,6 +5908,16 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]): return api_key +def get_utc_datetime(): + import datetime as dt + from datetime import datetime + + if hasattr(dt, "UTC"): + return datetime.now(dt.UTC) # type: ignore + else: + return datetime.utcnow() # type: ignore + + def get_max_tokens(model: str): """ Get the maximum number of output tokens allowed for a given model.