fix(alerting.py): fix datetime comparison logic

This commit is contained in:
Krrish Dholakia 2024-05-14 22:09:33 -07:00
parent 0bac40b0f2
commit 54587db402
4 changed files with 37 additions and 18 deletions

View file

@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
import datetime import datetime
from pydantic import BaseModel from pydantic import BaseModel
from enum import Enum from enum import Enum
from datetime import datetime as dt, timedelta from datetime import datetime as dt, timedelta, timezone
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
import random import random
@ -32,7 +32,9 @@ class LiteLLMBase(BaseModel):
class SlackAlertingArgs(LiteLLMBase): class SlackAlertingArgs(LiteLLMBase):
default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours
daily_report_frequency: int = int(os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)) daily_report_frequency: int = int(
os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)
)
report_check_interval: int = 5 * 60 # 5 minutes report_check_interval: int = 5 * 60 # 5 minutes
@ -373,8 +375,10 @@ class SlackAlerting(CustomLogger):
key=lambda i: replaced_failed_values[i], key=lambda i: replaced_failed_values[i],
reverse=True, reverse=True,
)[:5] )[:5]
top_5_failed = [index for index in top_5_failed if replaced_failed_values[index] > 0] top_5_failed = [
index for index in top_5_failed if replaced_failed_values[index] > 0
]
# find top 5 slowest # find top 5 slowest
# Replace None values with a placeholder value (-1 in this case) # Replace None values with a placeholder value (-1 in this case)
placeholder_value = 0 placeholder_value = 0
@ -389,7 +393,9 @@ class SlackAlerting(CustomLogger):
key=lambda i: replaced_slowest_values[i], key=lambda i: replaced_slowest_values[i],
reverse=True, reverse=True,
)[:5] )[:5]
top_5_slowest = [index for index in top_5_slowest if replaced_slowest_values[index] > 0] top_5_slowest = [
index for index in top_5_slowest if replaced_slowest_values[index] > 0
]
# format alert -> return the litellm model name + api base # format alert -> return the litellm model name + api base
message = f"\n\nHere are today's key metrics 📈: \n\n" message = f"\n\nHere are today's key metrics 📈: \n\n"
@ -847,15 +853,19 @@ Model Info:
value=_current_time, value=_current_time,
) )
else: else:
# check if current time - interval >= time last sent # Check if current time - interval >= time last sent
delta = current_time - timedelta( delta_naive = timedelta(seconds=self.alerting_args.daily_report_frequency)
seconds=self.alerting_args.daily_report_frequency
)
if isinstance(report_sent, str): if isinstance(report_sent, str):
report_sent = dt.fromisoformat(report_sent) report_sent = dt.fromisoformat(report_sent)
if delta >= report_sent: # Ensure report_sent is an aware datetime object
if report_sent.tzinfo is None:
report_sent = report_sent.replace(tzinfo=timezone.utc)
# Calculate delta as an aware datetime object with the same timezone as report_sent
delta = report_sent - delta_naive
if current_time >= delta:
# Sneak in the reporting logic here # Sneak in the reporting logic here
await self.send_daily_reports(router=llm_router) await self.send_daily_reports(router=llm_router)
# Also, don't forget to update the report_sent time after sending the report! # Also, don't forget to update the report_sent time after sending the report!

View file

@ -59,9 +59,7 @@ from importlib import resources
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f: with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
json_data = json.load(f) json_data = json.load(f)
# Convert to str (if necessary) # Convert to str (if necessary)
json_str = json.dumps(json_data) claude_json_str = json.dumps(json_data)
claude_tokenizer = Tokenizer.from_str(json_str)
cohere_tokenizer = Tokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer")
import importlib.metadata import importlib.metadata
from ._logging import verbose_logger from ._logging import verbose_logger
from .types.router import LiteLLM_Params from .types.router import LiteLLM_Params
@ -3856,12 +3854,15 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
@lru_cache(maxsize=128) @lru_cache(maxsize=128)
def _select_tokenizer(model: str): def _select_tokenizer(model: str):
global claude_tokenizer, cohere_tokenizer
if model in litellm.cohere_models and "command-r" in model: if model in litellm.cohere_models and "command-r" in model:
# cohere # cohere
cohere_tokenizer = Tokenizer.from_pretrained(
"Xenova/c4ai-command-r-v01-tokenizer"
)
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer} return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
# anthropic # anthropic
elif model in litellm.anthropic_models and "claude-3" not in model: elif model in litellm.anthropic_models and "claude-3" not in model:
claude_tokenizer = Tokenizer.from_str(claude_json_str)
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer} return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
# llama2 # llama2
elif "llama-2" in model.lower() or "replicate" in model.lower(): elif "llama-2" in model.lower() or "replicate" in model.lower():

View file

@ -1,10 +1,16 @@
model_list: model_list:
- model_name: gpt-3.5-turbo - model_name: gpt-3.5-turbo-end-user-test
litellm_params: litellm_params:
model: gpt-3.5-turbo model: gpt-3.5-turbo
region_name: "eu" region_name: "eu"
model_info: model_info:
id: "1" id: "1"
- model_name: gpt-3.5-turbo-end-user-test
litellm_params:
model: azure/chatgpt-v-2
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
- model_name: gpt-3.5-turbo - model_name: gpt-3.5-turbo
litellm_params: litellm_params:
model: azure/chatgpt-v-2 model: azure/chatgpt-v-2

View file

@ -153,7 +153,9 @@ async def test_end_user_specific_region():
) )
## MAKE CALL ## ## MAKE CALL ##
key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"]) key_gen = await generate_key(
session=session, i=0, models=["gpt-3.5-turbo-end-user-test"]
)
key = key_gen["key"] key = key_gen["key"]
@ -162,7 +164,7 @@ async def test_end_user_specific_region():
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"])) print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
result = await client.chat.completions.with_raw_response.create( result = await client.chat.completions.with_raw_response.create(
model="gpt-3.5-turbo", model="gpt-3.5-turbo-end-user-test",
messages=[{"role": "user", "content": "Hey!"}], messages=[{"role": "user", "content": "Hey!"}],
user=end_user_obj["user_id"], user=end_user_obj["user_id"],
) )