LiteLLM Minor Fixes & Improvements (09/19/2024) (#5793)

* fix(model_prices_and_context_window.json): add cost tracking for more vertex llama3.1 model

8b and 70b models

* fix(proxy/utils.py): handle data being none on pre-call hooks

* fix(proxy/): create views on initial proxy startup

fixes base case, where user starts proxy for first time

 Fixes https://github.com/BerriAI/litellm/issues/5756

* build(config.yml): fix vertex version for test

* feat(ui/): support enabling/disabling slack alerting

Allows admin to turn on/off slack alerting through ui

* feat(rerank/main.py): support langfuse logging

* fix(proxy/utils.py): fix linting errors

* fix(langfuse.py): log clean metadata

* test(tests): replace deprecated openai model
This commit is contained in:
Krish Dholakia 2024-09-20 08:19:52 -07:00 committed by GitHub
parent 7c241ddfcb
commit 4445bfb9d7
22 changed files with 645 additions and 94 deletions

View file

@ -14,7 +14,7 @@ from datetime import datetime, timedelta
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from functools import wraps
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union, overload
import backoff
import httpx
@ -51,6 +51,10 @@ from litellm.proxy._types import (
SpendLogsPayload,
UserAPIKeyAuth,
)
from litellm.proxy.db.create_views import (
create_missing_views,
should_create_missing_views,
)
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
from litellm.proxy.hooks.parallel_request_limiter import (
@ -365,6 +369,25 @@ class ProxyLogging:
return data
# The actual implementation of the function
@overload
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
data: None,
call_type: Literal[
"completion",
"text_completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
"pass_through_endpoint",
"rerank",
],
) -> None:
pass
@overload
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
@ -380,6 +403,23 @@ class ProxyLogging:
"rerank",
],
) -> dict:
pass
async def pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
data: Optional[dict],
call_type: Literal[
"completion",
"text_completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
"pass_through_endpoint",
"rerank",
],
) -> Optional[dict]:
"""
Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.
@ -394,6 +434,9 @@ class ProxyLogging:
self.slack_alerting_instance.response_taking_too_long(request_data=data)
)
if data is None:
return None
try:
for callback in litellm.callbacks:
_callback = None
@ -418,7 +461,7 @@ class ProxyLogging:
response = await _callback.async_pre_call_hook(
user_api_key_dict=user_api_key_dict,
cache=self.call_details["user_api_key_cache"],
data=data,
data=data, # type: ignore
call_type=call_type,
)
if response is not None:
@ -434,7 +477,7 @@ class ProxyLogging:
response = await _callback.async_pre_call_hook(
user_api_key_dict=user_api_key_dict,
cache=self.call_details["user_api_key_cache"],
data=data,
data=data, # type: ignore
call_type=call_type,
)
if response is not None:
@ -1021,20 +1064,24 @@ class PrismaClient:
"LiteLLM_VerificationTokenView Created in DB!"
)
else:
# don't block execution if these views are missing
# Convert lists to sets for efficient difference calculation
ret_view_names_set = (
set(ret[0]["view_names"]) if ret[0]["view_names"] else set()
)
expected_views_set = set(expected_views)
# Find missing views
missing_views = expected_views_set - ret_view_names_set
verbose_proxy_logger.warning(
"\n\n\033[93mNot all views exist in db, needed for UI 'Usage' tab. Missing={}.\nRun 'create_views.py' from https://github.com/BerriAI/litellm/tree/main/db_scripts to create missing views.\033[0m\n".format(
missing_views
should_create_views = await should_create_missing_views(db=self.db)
if should_create_views:
await create_missing_views(db=self.db)
else:
# don't block execution if these views are missing
# Convert lists to sets for efficient difference calculation
ret_view_names_set = (
set(ret[0]["view_names"]) if ret[0]["view_names"] else set()
)
expected_views_set = set(expected_views)
# Find missing views
missing_views = expected_views_set - ret_view_names_set
verbose_proxy_logger.warning(
"\n\n\033[93mNot all views exist in db, needed for UI 'Usage' tab. Missing={}.\nRun 'create_views.py' from https://github.com/BerriAI/litellm/tree/main/db_scripts to create missing views.\033[0m\n".format(
missing_views
)
)
)
except Exception as e:
raise