From 54587db402ee2029db355696c24ecc105721b793 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 14 May 2024 22:09:33 -0700 Subject: [PATCH] fix(alerting.py): fix datetime comparison logic --- litellm/integrations/slack_alerting.py | 32 +++++++++++++++++--------- litellm/utils.py | 9 ++++---- proxy_server_config.yaml | 8 ++++++- tests/test_end_users.py | 6 +++-- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index a56e894ad..ec2740e30 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -12,7 +12,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler import datetime from pydantic import BaseModel from enum import Enum -from datetime import datetime as dt, timedelta +from datetime import datetime as dt, timedelta, timezone from litellm.integrations.custom_logger import CustomLogger import random @@ -32,7 +32,9 @@ class LiteLLMBase(BaseModel): class SlackAlertingArgs(LiteLLMBase): default_daily_report_frequency: int = 12 * 60 * 60 # 12 hours - daily_report_frequency: int = int(os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency)) + daily_report_frequency: int = int( + os.getenv("SLACK_DAILY_REPORT_FREQUENCY", default_daily_report_frequency) + ) report_check_interval: int = 5 * 60 # 5 minutes @@ -373,8 +375,10 @@ class SlackAlerting(CustomLogger): key=lambda i: replaced_failed_values[i], reverse=True, )[:5] - top_5_failed = [index for index in top_5_failed if replaced_failed_values[index] > 0] - + top_5_failed = [ + index for index in top_5_failed if replaced_failed_values[index] > 0 + ] + # find top 5 slowest # Replace None values with a placeholder value (-1 in this case) placeholder_value = 0 @@ -389,7 +393,9 @@ class SlackAlerting(CustomLogger): key=lambda i: replaced_slowest_values[i], reverse=True, )[:5] - top_5_slowest = [index for index in top_5_slowest if replaced_slowest_values[index] > 0] + top_5_slowest = [ + index for index in top_5_slowest if replaced_slowest_values[index] > 0 + ] # format alert -> return the litellm model name + api base message = f"\n\nHere are today's key metrics 📈: \n\n" @@ -847,15 +853,19 @@ Model Info: value=_current_time, ) else: - # check if current time - interval >= time last sent - delta = current_time - timedelta( - seconds=self.alerting_args.daily_report_frequency - ) - + # Check if current time - interval >= time last sent + delta_naive = timedelta(seconds=self.alerting_args.daily_report_frequency) if isinstance(report_sent, str): report_sent = dt.fromisoformat(report_sent) - if delta >= report_sent: + # Ensure report_sent is an aware datetime object + if report_sent.tzinfo is None: + report_sent = report_sent.replace(tzinfo=timezone.utc) + + # Calculate delta as an aware datetime object with the same timezone as report_sent + delta = report_sent - delta_naive + + if current_time >= delta: # Sneak in the reporting logic here await self.send_daily_reports(router=llm_router) # Also, don't forget to update the report_sent time after sending the report! diff --git a/litellm/utils.py b/litellm/utils.py index 00f39cc01..7df79f373 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -59,9 +59,7 @@ from importlib import resources with resources.open_text("litellm.llms.tokenizers", "anthropic_tokenizer.json") as f: json_data = json.load(f) # Convert to str (if necessary) -json_str = json.dumps(json_data) -claude_tokenizer = Tokenizer.from_str(json_str) -cohere_tokenizer = Tokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer") +claude_json_str = json.dumps(json_data) import importlib.metadata from ._logging import verbose_logger from .types.router import LiteLLM_Params @@ -3856,12 +3854,15 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0): @lru_cache(maxsize=128) def _select_tokenizer(model: str): - global claude_tokenizer, cohere_tokenizer if model in litellm.cohere_models and "command-r" in model: # cohere + cohere_tokenizer = Tokenizer.from_pretrained( + "Xenova/c4ai-command-r-v01-tokenizer" + ) return {"type": "huggingface_tokenizer", "tokenizer": cohere_tokenizer} # anthropic elif model in litellm.anthropic_models and "claude-3" not in model: + claude_tokenizer = Tokenizer.from_str(claude_json_str) return {"type": "huggingface_tokenizer", "tokenizer": claude_tokenizer} # llama2 elif "llama-2" in model.lower() or "replicate" in model.lower(): diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index 9b0e7c9d0..10f0d4a75 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -1,10 +1,16 @@ model_list: - - model_name: gpt-3.5-turbo + - model_name: gpt-3.5-turbo-end-user-test litellm_params: model: gpt-3.5-turbo region_name: "eu" model_info: id: "1" + - model_name: gpt-3.5-turbo-end-user-test + litellm_params: + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault - model_name: gpt-3.5-turbo litellm_params: model: azure/chatgpt-v-2 diff --git a/tests/test_end_users.py b/tests/test_end_users.py index 3f1568f96..f49213044 100644 --- a/tests/test_end_users.py +++ b/tests/test_end_users.py @@ -153,7 +153,9 @@ async def test_end_user_specific_region(): ) ## MAKE CALL ## - key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"]) + key_gen = await generate_key( + session=session, i=0, models=["gpt-3.5-turbo-end-user-test"] + ) key = key_gen["key"] @@ -162,7 +164,7 @@ async def test_end_user_specific_region(): print("SENDING USER PARAM - {}".format(end_user_obj["user_id"])) result = await client.chat.completions.with_raw_response.create( - model="gpt-3.5-turbo", + model="gpt-3.5-turbo-end-user-test", messages=[{"role": "user", "content": "Hey!"}], user=end_user_obj["user_id"], )