fix(alerting.py): fix datetime comparison logic

This commit is contained in:
Krrish Dholakia 2024-05-14 22:09:33 -07:00
parent 0bac40b0f2
commit 54587db402
4 changed files with 37 additions and 18 deletions

View file

@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
import datetime
from pydantic import BaseModel
from enum import Enum
from datetime import datetime as dt, timedelta
from datetime import datetime as dt, timedelta, timezone
from litellm.integrations.custom_logger import CustomLogger
import random
@ -32,7 +32,9 @@ class LiteLLMBase(BaseModel):
class SlackAlertingArgs(LiteLLMBase):
default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours
daily_report_frequency: int = int(os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency))
daily_report_frequency: int = int(
os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)
)
report_check_interval: int = 5 * 60 # 5 minutes
@ -373,8 +375,10 @@ class SlackAlerting(CustomLogger):
key=lambda i: replaced_failed_values[i],
reverse=True,
)[:5]
top_5_failed = [index for index in top_5_failed if replaced_failed_values[index] > 0]
top_5_failed = [
index for index in top_5_failed if replaced_failed_values[index] > 0
]
# find top 5 slowest
# Replace None values with a placeholder value (-1 in this case)
placeholder_value = 0
@ -389,7 +393,9 @@ class SlackAlerting(CustomLogger):
key=lambda i: replaced_slowest_values[i],
reverse=True,
)[:5]
top_5_slowest = [index for index in top_5_slowest if replaced_slowest_values[index] > 0]
top_5_slowest = [
index for index in top_5_slowest if replaced_slowest_values[index] > 0
]
# format alert -> return the litellm model name + api base
message = f"\n\nHere are today's key metrics 📈: \n\n"
@ -847,15 +853,19 @@ Model Info:
value=_current_time,
)
else:
# check if current time - interval >= time last sent
delta = current_time - timedelta(
seconds=self.alerting_args.daily_report_frequency
)
# Check if current time - interval >= time last sent
delta_naive = timedelta(seconds=self.alerting_args.daily_report_frequency)
if isinstance(report_sent, str):
report_sent = dt.fromisoformat(report_sent)
if delta >= report_sent:
# Ensure report_sent is an aware datetime object
if report_sent.tzinfo is None:
report_sent = report_sent.replace(tzinfo=timezone.utc)
# Calculate delta as an aware datetime object with the same timezone as report_sent
delta = report_sent - delta_naive
if current_time >= delta:
# Sneak in the reporting logic here
await self.send_daily_reports(router=llm_router)
# Also, don't forget to update the report_sent time after sending the report!

View file

@ -59,9 +59,7 @@ from importlib import resources
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
json_data = json.load(f)
# Convert to str (if necessary)
json_str = json.dumps(json_data)
claude_tokenizer = Tokenizer.from_str(json_str)
cohere_tokenizer = Tokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer")
claude_json_str = json.dumps(json_data)
import importlib.metadata
from ._logging import verbose_logger
from .types.router import LiteLLM_Params
@ -3856,12 +3854,15 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
@lru_cache(maxsize=128)
def _select_tokenizer(model: str):
global claude_tokenizer, cohere_tokenizer
if model in litellm.cohere_models and "command-r" in model:
# cohere
cohere_tokenizer = Tokenizer.from_pretrained(
"Xenova/c4ai-command-r-v01-tokenizer"
)
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
# anthropic
elif model in litellm.anthropic_models and "claude-3" not in model:
claude_tokenizer = Tokenizer.from_str(claude_json_str)
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
# llama2
elif "llama-2" in model.lower() or "replicate" in model.lower():

View file

@ -1,10 +1,16 @@
model_list:
- model_name: gpt-3.5-turbo
- model_name: gpt-3.5-turbo-end-user-test
litellm_params:
model: gpt-3.5-turbo
region_name: "eu"
model_info:
id: "1"
- model_name: gpt-3.5-turbo-end-user-test
litellm_params:
model: azure/chatgpt-v-2
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
- model_name: gpt-3.5-turbo
litellm_params:
model: azure/chatgpt-v-2

View file

@ -153,7 +153,9 @@ async def test_end_user_specific_region():
)
## MAKE CALL ##
key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"])
key_gen = await generate_key(
session=session, i=0, models=["gpt-3.5-turbo-end-user-test"]
)
key = key_gen["key"]
@ -162,7 +164,7 @@ async def test_end_user_specific_region():
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
result = await client.chat.completions.with_raw_response.create(
model="gpt-3.5-turbo",
model="gpt-3.5-turbo-end-user-test",
messages=[{"role": "user", "content": "Hey!"}],
user=end_user_obj["user_id"],
)