forked from phoenix/litellm-mirror
fix(alerting.py): fix datetime comparison logic
This commit is contained in:
parent
0bac40b0f2
commit
54587db402
4 changed files with 37 additions and 18 deletions
|
@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
|||
import datetime
|
||||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
from datetime import datetime as dt, timedelta
|
||||
from datetime import datetime as dt, timedelta, timezone
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
import random
|
||||
|
||||
|
@ -32,7 +32,9 @@ class LiteLLMBase(BaseModel):
|
|||
|
||||
class SlackAlertingArgs(LiteLLMBase):
|
||||
default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours
|
||||
daily_report_frequency: int = int(os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency))
|
||||
daily_report_frequency: int = int(
|
||||
os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)
|
||||
)
|
||||
report_check_interval: int = 5 * 60 # 5 minutes
|
||||
|
||||
|
||||
|
@ -373,8 +375,10 @@ class SlackAlerting(CustomLogger):
|
|||
key=lambda i: replaced_failed_values[i],
|
||||
reverse=True,
|
||||
)[:5]
|
||||
top_5_failed = [index for index in top_5_failed if replaced_failed_values[index] > 0]
|
||||
|
||||
top_5_failed = [
|
||||
index for index in top_5_failed if replaced_failed_values[index] > 0
|
||||
]
|
||||
|
||||
# find top 5 slowest
|
||||
# Replace None values with a placeholder value (-1 in this case)
|
||||
placeholder_value = 0
|
||||
|
@ -389,7 +393,9 @@ class SlackAlerting(CustomLogger):
|
|||
key=lambda i: replaced_slowest_values[i],
|
||||
reverse=True,
|
||||
)[:5]
|
||||
top_5_slowest = [index for index in top_5_slowest if replaced_slowest_values[index] > 0]
|
||||
top_5_slowest = [
|
||||
index for index in top_5_slowest if replaced_slowest_values[index] > 0
|
||||
]
|
||||
|
||||
# format alert -> return the litellm model name + api base
|
||||
message = f"\n\nHere are today's key metrics 📈: \n\n"
|
||||
|
@ -847,15 +853,19 @@ Model Info:
|
|||
value=_current_time,
|
||||
)
|
||||
else:
|
||||
# check if current time - interval >= time last sent
|
||||
delta = current_time - timedelta(
|
||||
seconds=self.alerting_args.daily_report_frequency
|
||||
)
|
||||
|
||||
# Check if current time - interval >= time last sent
|
||||
delta_naive = timedelta(seconds=self.alerting_args.daily_report_frequency)
|
||||
if isinstance(report_sent, str):
|
||||
report_sent = dt.fromisoformat(report_sent)
|
||||
|
||||
if delta >= report_sent:
|
||||
# Ensure report_sent is an aware datetime object
|
||||
if report_sent.tzinfo is None:
|
||||
report_sent = report_sent.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Calculate delta as an aware datetime object with the same timezone as report_sent
|
||||
delta = report_sent - delta_naive
|
||||
|
||||
if current_time >= delta:
|
||||
# Sneak in the reporting logic here
|
||||
await self.send_daily_reports(router=llm_router)
|
||||
# Also, don't forget to update the report_sent time after sending the report!
|
||||
|
|
|
@ -59,9 +59,7 @@ from importlib import resources
|
|||
with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f:
|
||||
json_data = json.load(f)
|
||||
# Convert to str (if necessary)
|
||||
json_str = json.dumps(json_data)
|
||||
claude_tokenizer = Tokenizer.from_str(json_str)
|
||||
cohere_tokenizer = Tokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer")
|
||||
claude_json_str = json.dumps(json_data)
|
||||
import importlib.metadata
|
||||
from ._logging import verbose_logger
|
||||
from .types.router import LiteLLM_Params
|
||||
|
@ -3856,12 +3854,15 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
|
|||
|
||||
@lru_cache(maxsize=128)
|
||||
def _select_tokenizer(model: str):
|
||||
global claude_tokenizer, cohere_tokenizer
|
||||
if model in litellm.cohere_models and "command-r" in model:
|
||||
# cohere
|
||||
cohere_tokenizer = Tokenizer.from_pretrained(
|
||||
"Xenova/c4ai-command-r-v01-tokenizer"
|
||||
)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer}
|
||||
# anthropic
|
||||
elif model in litellm.anthropic_models and "claude-3" not in model:
|
||||
claude_tokenizer = Tokenizer.from_str(claude_json_str)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer}
|
||||
# llama2
|
||||
elif "llama-2" in model.lower() or "replicate" in model.lower():
|
||||
|
|
|
@ -1,10 +1,16 @@
|
|||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
region_name: "eu"
|
||||
model_info:
|
||||
id: "1"
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
|
|
|
@ -153,7 +153,9 @@ async def test_end_user_specific_region():
|
|||
)
|
||||
|
||||
## MAKE CALL ##
|
||||
key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"])
|
||||
key_gen = await generate_key(
|
||||
session=session, i=0, models=["gpt-3.5-turbo-end-user-test"]
|
||||
)
|
||||
|
||||
key = key_gen["key"]
|
||||
|
||||
|
@ -162,7 +164,7 @@ async def test_end_user_specific_region():
|
|||
|
||||
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
|
||||
result = await client.chat.completions.with_raw_response.create(
|
||||
model="gpt-3.5-turbo",
|
||||
model="gpt-3.5-turbo-end-user-test",
|
||||
messages=[{"role": "user", "content": "Hey!"}],
|
||||
user=end_user_obj["user_id"],
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue