mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(alerting.py): fix datetime comparison logic
This commit is contained in:
parent
0bac40b0f2
commit
54587db402
4 changed files with 37 additions and 18 deletions
|
@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
import datetime
|
import datetime
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from datetime import datetime as dt, timedelta
|
from datetime import datetime as dt, timedelta, timezone
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
@ -32,7 +32,9 @@ class LiteLLMBase(BaseModel):
|
||||||
|
|
||||||
class SlackAlertingArgs(LiteLLMBase):
|
class SlackAlertingArgs(LiteLLMBase):
|
||||||
default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours
|
default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours
|
||||||
daily_report_frequency: int = int(os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency))
|
daily_report_frequency: int = int(
|
||||||
|
os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)
|
||||||
|
)
|
||||||
report_check_interval: int = 5 * 60 # 5 minutes
|
report_check_interval: int = 5 * 60 # 5 minutes
|
||||||
|
|
||||||
|
|
||||||
|
@ -373,8 +375,10 @@ class SlackAlerting(CustomLogger):
|
||||||
key=lambda i: replaced_failed_values[i],
|
key=lambda i: replaced_failed_values[i],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)[:5]
|
)[:5]
|
||||||
top_5_failed = [index for index in top_5_failed if replaced_failed_values[index] > 0]
|
top_5_failed = [
|
||||||
|
index for index in top_5_failed if replaced_failed_values[index] > 0
|
||||||
|
]
|
||||||
|
|
||||||
# find top 5 slowest
|
# find top 5 slowest
|
||||||
# Replace None values with a placeholder value (-1 in this case)
|
# Replace None values with a placeholder value (-1 in this case)
|
||||||
placeholder_value = 0
|
placeholder_value = 0
|
||||||
|
@ -389,7 +393,9 @@ class SlackAlerting(CustomLogger):
|
||||||
key=lambda i: replaced_slowest_values[i],
|
key=lambda i: replaced_slowest_values[i],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)[:5]
|
)[:5]
|
||||||
top_5_slowest = [index for index in top_5_slowest if replaced_slowest_values[index] > 0]
|
top_5_slowest = [
|
||||||
|
index for index in top_5_slowest if replaced_slowest_values[index] > 0
|
||||||
|
]
|
||||||
|
|
||||||
# format alert -> return the litellm model name + api base
|
# format alert -> return the litellm model name + api base
|
||||||
message = f"\n\nHere are today's key metrics 📈: \n\n"
|
message = f"\n\nHere are today's key metrics 📈: \n\n"
|
||||||
|
@ -847,15 +853,19 @@ Model Info:
|
||||||
value=_current_time,
|
value=_current_time,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# check if current time - interval >= time last sent
|
# Check if current time - interval >= time last sent
|
||||||
delta = current_time - timedelta(
|
delta_naive = timedelta(seconds=self.alerting_args.daily_report_frequency)
|
||||||
seconds=self.alerting_args.daily_report_frequency
|
|
||||||
)
|
|
||||||
|
|
||||||
if isinstance(report_sent, str):
|
if isinstance(report_sent, str):
|
||||||
report_sent = dt.fromisoformat(report_sent)
|
report_sent = dt.fromisoformat(report_sent)
|
||||||
|
|
||||||
if delta >= report_sent:
|
# Ensure report_sent is an aware datetime object
|
||||||
|
if report_sent.tzinfo is None:
|
||||||
|
report_sent = report_sent.replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
# Calculate delta as an aware datetime object with the same timezone as report_sent
|
||||||
|
delta = report_sent - delta_naive
|
||||||
|
|
||||||
|
if current_time >= delta:
|
||||||
# Sneak in the reporting logic here
|
# Sneak in the reporting logic here
|
||||||
await self.send_daily_reports(router=llm_router)
|
await self.send_daily_reports(router=llm_router)
|
||||||
# Also, don't forget to update the report_sent time after sending the report!
|
# Also, don't forget to update the report_sent time after sending the report!
|
||||||
|
|
|
@ -59,9 +59,7 @@ from importlib import resources
|
||||||
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
|
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
|
||||||
json_data = json.load(f)
|
json_data = json.load(f)
|
||||||
# Convert to str (if necessary)
|
# Convert to str (if necessary)
|
||||||
json_str = json.dumps(json_data)
|
claude_json_str = json.dumps(json_data)
|
||||||
claude_tokenizer = Tokenizer.from_str(json_str)
|
|
||||||
cohere_tokenizer = Tokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer")
|
|
||||||
import importlib.metadata
|
import importlib.metadata
|
||||||
from ._logging import verbose_logger
|
from ._logging import verbose_logger
|
||||||
from .types.router import LiteLLM_Params
|
from .types.router import LiteLLM_Params
|
||||||
|
@ -3856,12 +3854,15 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
|
||||||
|
|
||||||
@lru_cache(maxsize=128)
|
@lru_cache(maxsize=128)
|
||||||
def _select_tokenizer(model: str):
|
def _select_tokenizer(model: str):
|
||||||
global claude_tokenizer, cohere_tokenizer
|
|
||||||
if model in litellm.cohere_models and "command-r" in model:
|
if model in litellm.cohere_models and "command-r" in model:
|
||||||
# cohere
|
# cohere
|
||||||
|
cohere_tokenizer = Tokenizer.from_pretrained(
|
||||||
|
"Xenova/c4ai-command-r-v01-tokenizer"
|
||||||
|
)
|
||||||
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
||||||
# anthropic
|
# anthropic
|
||||||
elif model in litellm.anthropic_models and "claude-3" not in model:
|
elif model in litellm.anthropic_models and "claude-3" not in model:
|
||||||
|
claude_tokenizer = Tokenizer.from_str(claude_json_str)
|
||||||
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
||||||
# llama2
|
# llama2
|
||||||
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
||||||
|
|
|
@ -1,10 +1,16 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-3.5-turbo
|
- model_name: gpt-3.5-turbo-end-user-test
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: gpt-3.5-turbo
|
||||||
region_name: "eu"
|
region_name: "eu"
|
||||||
model_info:
|
model_info:
|
||||||
id: "1"
|
id: "1"
|
||||||
|
- model_name: gpt-3.5-turbo-end-user-test
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||||
|
api_version: "2023-05-15"
|
||||||
|
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||||
- model_name: gpt-3.5-turbo
|
- model_name: gpt-3.5-turbo
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/chatgpt-v-2
|
model: azure/chatgpt-v-2
|
||||||
|
|
|
@ -153,7 +153,9 @@ async def test_end_user_specific_region():
|
||||||
)
|
)
|
||||||
|
|
||||||
## MAKE CALL ##
|
## MAKE CALL ##
|
||||||
key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"])
|
key_gen = await generate_key(
|
||||||
|
session=session, i=0, models=["gpt-3.5-turbo-end-user-test"]
|
||||||
|
)
|
||||||
|
|
||||||
key = key_gen["key"]
|
key = key_gen["key"]
|
||||||
|
|
||||||
|
@ -162,7 +164,7 @@ async def test_end_user_specific_region():
|
||||||
|
|
||||||
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
|
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
|
||||||
result = await client.chat.completions.with_raw_response.create(
|
result = await client.chat.completions.with_raw_response.create(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo-end-user-test",
|
||||||
messages=[{"role": "user", "content": "Hey!"}],
|
messages=[{"role": "user", "content": "Hey!"}],
|
||||||
user=end_user_obj["user_id"],
|
user=end_user_obj["user_id"],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue