mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
LiteLLM Minor Fixes & Improvements (09/19/2024) (#5793)
* fix(model_prices_and_context_window.json): add cost tracking for more vertex llama3.1 model 8b and 70b models * fix(proxy/utils.py): handle data being none on pre-call hooks * fix(proxy/): create views on initial proxy startup fixes base case, where user starts proxy for first time Fixes https://github.com/BerriAI/litellm/issues/5756 * build(config.yml): fix vertex version for test * feat(ui/): support enabling/disabling slack alerting Allows admin to turn on/off slack alerting through ui * feat(rerank/main.py): support langfuse logging * fix(proxy/utils.py): fix linting errors * fix(langfuse.py): log clean metadata * test(tests): replace deprecated openai model
This commit is contained in:
parent
7c241ddfcb
commit
4445bfb9d7
22 changed files with 645 additions and 94 deletions
|
@ -14,7 +14,7 @@ from datetime import datetime, timedelta
|
|||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from functools import wraps
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple, Union, overload
|
||||
|
||||
import backoff
|
||||
import httpx
|
||||
|
@ -51,6 +51,10 @@ from litellm.proxy._types import (
|
|||
SpendLogsPayload,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.db.create_views import (
|
||||
create_missing_views,
|
||||
should_create_missing_views,
|
||||
)
|
||||
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
||||
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
|
@ -365,6 +369,25 @@ class ProxyLogging:
|
|||
return data
|
||||
|
||||
# The actual implementation of the function
|
||||
@overload
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
data: None,
|
||||
call_type: Literal[
|
||||
"completion",
|
||||
"text_completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
"pass_through_endpoint",
|
||||
"rerank",
|
||||
],
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
@overload
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
|
@ -380,6 +403,23 @@ class ProxyLogging:
|
|||
"rerank",
|
||||
],
|
||||
) -> dict:
|
||||
pass
|
||||
|
||||
async def pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
data: Optional[dict],
|
||||
call_type: Literal[
|
||||
"completion",
|
||||
"text_completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
"pass_through_endpoint",
|
||||
"rerank",
|
||||
],
|
||||
) -> Optional[dict]:
|
||||
"""
|
||||
Allows users to modify/reject the incoming request to the proxy, without having to deal with parsing Request body.
|
||||
|
||||
|
@ -394,6 +434,9 @@ class ProxyLogging:
|
|||
self.slack_alerting_instance.response_taking_too_long(request_data=data)
|
||||
)
|
||||
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
for callback in litellm.callbacks:
|
||||
_callback = None
|
||||
|
@ -418,7 +461,7 @@ class ProxyLogging:
|
|||
response = await _callback.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
cache=self.call_details["user_api_key_cache"],
|
||||
data=data,
|
||||
data=data, # type: ignore
|
||||
call_type=call_type,
|
||||
)
|
||||
if response is not None:
|
||||
|
@ -434,7 +477,7 @@ class ProxyLogging:
|
|||
response = await _callback.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
cache=self.call_details["user_api_key_cache"],
|
||||
data=data,
|
||||
data=data, # type: ignore
|
||||
call_type=call_type,
|
||||
)
|
||||
if response is not None:
|
||||
|
@ -1021,20 +1064,24 @@ class PrismaClient:
|
|||
"LiteLLM_VerificationTokenView Created in DB!"
|
||||
)
|
||||
else:
|
||||
# don't block execution if these views are missing
|
||||
# Convert lists to sets for efficient difference calculation
|
||||
ret_view_names_set = (
|
||||
set(ret[0]["view_names"]) if ret[0]["view_names"] else set()
|
||||
)
|
||||
expected_views_set = set(expected_views)
|
||||
# Find missing views
|
||||
missing_views = expected_views_set - ret_view_names_set
|
||||
|
||||
verbose_proxy_logger.warning(
|
||||
"\n\n\033[93mNot all views exist in db, needed for UI 'Usage' tab. Missing={}.\nRun 'create_views.py' from https://github.com/BerriAI/litellm/tree/main/db_scripts to create missing views.\033[0m\n".format(
|
||||
missing_views
|
||||
should_create_views = await should_create_missing_views(db=self.db)
|
||||
if should_create_views:
|
||||
await create_missing_views(db=self.db)
|
||||
else:
|
||||
# don't block execution if these views are missing
|
||||
# Convert lists to sets for efficient difference calculation
|
||||
ret_view_names_set = (
|
||||
set(ret[0]["view_names"]) if ret[0]["view_names"] else set()
|
||||
)
|
||||
expected_views_set = set(expected_views)
|
||||
# Find missing views
|
||||
missing_views = expected_views_set - ret_view_names_set
|
||||
|
||||
verbose_proxy_logger.warning(
|
||||
"\n\n\033[93mNot all views exist in db, needed for UI 'Usage' tab. Missing={}.\nRun 'create_views.py' from https://github.com/BerriAI/litellm/tree/main/db_scripts to create missing views.\033[0m\n".format(
|
||||
missing_views
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue