forked from phoenix/litellm-mirror
fix(router.py): fix datetime object
This commit is contained in:
parent
2531701a2a
commit
37ac17aebd
3 changed files with 33 additions and 15 deletions
|
@ -26,7 +26,7 @@ from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
||||||
CustomHTTPTransport,
|
CustomHTTPTransport,
|
||||||
AsyncCustomHTTPTransport,
|
AsyncCustomHTTPTransport,
|
||||||
)
|
)
|
||||||
from litellm.utils import ModelResponse, CustomStreamWrapper
|
from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime
|
||||||
import copy
|
import copy
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
import logging
|
import logging
|
||||||
|
@ -588,7 +588,7 @@ class Router:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Inside _image_generation()- model: {model}; kwargs: {kwargs}"
|
f"Inside _image_generation()- model: {model}; kwargs: {kwargs}"
|
||||||
)
|
)
|
||||||
deployment = self.get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "prompt"}],
|
messages=[{"role": "user", "content": "prompt"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
@ -688,7 +688,7 @@ class Router:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"
|
f"Inside _atranscription()- model: {model}; kwargs: {kwargs}"
|
||||||
)
|
)
|
||||||
deployment = self.get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": "prompt"}],
|
messages=[{"role": "user", "content": "prompt"}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
@ -768,7 +768,7 @@ class Router:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Inside _moderation()- model: {model}; kwargs: {kwargs}"
|
f"Inside _moderation()- model: {model}; kwargs: {kwargs}"
|
||||||
)
|
)
|
||||||
deployment = self.get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
@ -911,7 +911,7 @@ class Router:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Inside _atext_completion()- model: {model}; kwargs: {kwargs}"
|
f"Inside _atext_completion()- model: {model}; kwargs: {kwargs}"
|
||||||
)
|
)
|
||||||
deployment = self.get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
@ -1077,7 +1077,7 @@ class Router:
|
||||||
verbose_router_logger.debug(
|
verbose_router_logger.debug(
|
||||||
f"Inside _aembedding()- model: {model}; kwargs: {kwargs}"
|
f"Inside _aembedding()- model: {model}; kwargs: {kwargs}"
|
||||||
)
|
)
|
||||||
deployment = self.get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
specific_deployment=kwargs.pop("specific_deployment", None),
|
specific_deployment=kwargs.pop("specific_deployment", None),
|
||||||
|
@ -1605,7 +1605,8 @@ class Router:
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
# get current fails for deployment
|
# get current fails for deployment
|
||||||
# update the number of failed calls
|
# update the number of failed calls
|
||||||
# if it's > allowed fails
|
# if it's > allowed fails
|
||||||
|
@ -1647,7 +1648,8 @@ class Router:
|
||||||
"""
|
"""
|
||||||
Async implementation of '_get_cooldown_deployments'
|
Async implementation of '_get_cooldown_deployments'
|
||||||
"""
|
"""
|
||||||
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
# get the current cooldown list for that minute
|
# get the current cooldown list for that minute
|
||||||
cooldown_key = f"{current_minute}:cooldown_models"
|
cooldown_key = f"{current_minute}:cooldown_models"
|
||||||
|
|
||||||
|
@ -1663,7 +1665,8 @@ class Router:
|
||||||
"""
|
"""
|
||||||
Get the list of models being cooled down for this minute
|
Get the list of models being cooled down for this minute
|
||||||
"""
|
"""
|
||||||
current_minute = datetime.now().strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
# get the current cooldown list for that minute
|
# get the current cooldown list for that minute
|
||||||
cooldown_key = f"{current_minute}:cooldown_models"
|
cooldown_key = f"{current_minute}:cooldown_models"
|
||||||
|
|
||||||
|
@ -2336,7 +2339,8 @@ class Router:
|
||||||
_rate_limit_error = False
|
_rate_limit_error = False
|
||||||
|
|
||||||
## get model group RPM ##
|
## get model group RPM ##
|
||||||
current_minute = datetime.now().strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
rpm_key = f"{model}:rpm:{current_minute}"
|
rpm_key = f"{model}:rpm:{current_minute}"
|
||||||
model_group_cache = (
|
model_group_cache = (
|
||||||
self.cache.get_cache(key=rpm_key, local_only=True) or {}
|
self.cache.get_cache(key=rpm_key, local_only=True) or {}
|
||||||
|
|
|
@ -12,7 +12,7 @@ from litellm import token_counter
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.utils import print_verbose
|
from litellm.utils import print_verbose, get_utc_datetime
|
||||||
|
|
||||||
|
|
||||||
class LowestTPMLoggingHandler_v2(CustomLogger):
|
class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
|
@ -60,7 +60,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
# ------------
|
# ------------
|
||||||
# Setup values
|
# Setup values
|
||||||
# ------------
|
# ------------
|
||||||
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
tpm_key = f"{model_group}:tpm:{current_minute}"
|
tpm_key = f"{model_group}:tpm:{current_minute}"
|
||||||
rpm_key = f"{model_group}:rpm:{current_minute}"
|
rpm_key = f"{model_group}:rpm:{current_minute}"
|
||||||
|
|
||||||
|
@ -110,7 +111,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
# ------------
|
# ------------
|
||||||
# Setup values
|
# Setup values
|
||||||
# ------------
|
# ------------
|
||||||
current_minute = datetime.now(datetime_og.UTC).strftime(
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime(
|
||||||
"%H-%M"
|
"%H-%M"
|
||||||
) # use the same timezone regardless of system clock
|
) # use the same timezone regardless of system clock
|
||||||
|
|
||||||
|
@ -241,7 +243,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
|
f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
|
||||||
)
|
)
|
||||||
|
|
||||||
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
tpm_keys = []
|
tpm_keys = []
|
||||||
rpm_keys = []
|
rpm_keys = []
|
||||||
for m in healthy_deployments:
|
for m in healthy_deployments:
|
||||||
|
@ -288,7 +291,8 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
|
f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
|
||||||
)
|
)
|
||||||
|
|
||||||
current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
|
dt = get_utc_datetime()
|
||||||
|
current_minute = dt.strftime("%H-%M")
|
||||||
tpm_keys = []
|
tpm_keys = []
|
||||||
rpm_keys = []
|
rpm_keys = []
|
||||||
for m in healthy_deployments:
|
for m in healthy_deployments:
|
||||||
|
|
|
@ -5908,6 +5908,16 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]):
|
||||||
return api_key
|
return api_key
|
||||||
|
|
||||||
|
|
||||||
|
def get_utc_datetime():
|
||||||
|
import datetime as dt
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
if hasattr(dt, "UTC"):
|
||||||
|
return datetime.now(dt.UTC) # type: ignore
|
||||||
|
else:
|
||||||
|
return datetime.utcnow() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def get_max_tokens(model: str):
|
def get_max_tokens(model: str):
|
||||||
"""
|
"""
|
||||||
Get the maximum number of output tokens allowed for a given model.
|
Get the maximum number of output tokens allowed for a given model.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue