mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(feat) log error class, function_name on prometheus service failure hook + only log DB related failures on DB service hook (#6650)
* log error on prometheus service failure hook * use a more accurate function name for wrapper that handles logging db metrics * fix log_db_metrics * test_log_db_metrics_failure_error_types * fix linting * fix auth checks
This commit is contained in:
parent
ae385cfcdc
commit
eb47117800
8 changed files with 249 additions and 110 deletions
|
@ -55,10 +55,6 @@ from litellm.integrations.custom_guardrail import CustomGuardrail
|
|||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
||||
from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
|
||||
from litellm.litellm_core_utils.core_helpers import (
|
||||
_get_parent_otel_span_from_kwargs,
|
||||
get_litellm_metadata_from_kwargs,
|
||||
)
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
from litellm.proxy._types import (
|
||||
|
@ -77,6 +73,7 @@ from litellm.proxy.db.create_views import (
|
|||
create_missing_views,
|
||||
should_create_missing_views,
|
||||
)
|
||||
from litellm.proxy.db.log_db_metrics import log_db_metrics
|
||||
from litellm.proxy.db.prisma_client import PrismaWrapper
|
||||
from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
|
||||
from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||
|
@ -137,83 +134,6 @@ def safe_deep_copy(data):
|
|||
return new_data
|
||||
|
||||
|
||||
def log_to_opentelemetry(func):
|
||||
"""
|
||||
Decorator to log the duration of a DB related function to ServiceLogger()
|
||||
|
||||
Handles logging DB success/failure to ServiceLogger(), which logs to Prometheus, OTEL, Datadog
|
||||
"""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_time: datetime = datetime.now()
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
end_time: datetime = datetime.now()
|
||||
from litellm.proxy.proxy_server import proxy_logging_obj
|
||||
|
||||
if "PROXY" not in func.__name__:
|
||||
await proxy_logging_obj.service_logging_obj.async_service_success_hook(
|
||||
service=ServiceTypes.DB,
|
||||
call_type=func.__name__,
|
||||
parent_otel_span=kwargs.get("parent_otel_span", None),
|
||||
duration=(end_time - start_time).total_seconds(),
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
event_metadata={
|
||||
"function_name": func.__name__,
|
||||
"function_kwargs": kwargs,
|
||||
"function_args": args,
|
||||
},
|
||||
)
|
||||
elif (
|
||||
# in litellm custom callbacks kwargs is passed as arg[0]
|
||||
# https://docs.litellm.ai/docs/observability/custom_callback#callback-functions
|
||||
args is not None
|
||||
and len(args) > 0
|
||||
and isinstance(args[0], dict)
|
||||
):
|
||||
passed_kwargs = args[0]
|
||||
parent_otel_span = _get_parent_otel_span_from_kwargs(
|
||||
kwargs=passed_kwargs
|
||||
)
|
||||
if parent_otel_span is not None:
|
||||
metadata = get_litellm_metadata_from_kwargs(kwargs=passed_kwargs)
|
||||
await proxy_logging_obj.service_logging_obj.async_service_success_hook(
|
||||
service=ServiceTypes.BATCH_WRITE_TO_DB,
|
||||
call_type=func.__name__,
|
||||
parent_otel_span=parent_otel_span,
|
||||
duration=0.0,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
event_metadata=metadata,
|
||||
)
|
||||
# end of logging to otel
|
||||
return result
|
||||
except Exception as e:
|
||||
from litellm.proxy.proxy_server import proxy_logging_obj
|
||||
|
||||
end_time: datetime = datetime.now()
|
||||
await proxy_logging_obj.service_logging_obj.async_service_failure_hook(
|
||||
error=e,
|
||||
service=ServiceTypes.DB,
|
||||
call_type=func.__name__,
|
||||
parent_otel_span=kwargs.get("parent_otel_span"),
|
||||
duration=(end_time - start_time).total_seconds(),
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
event_metadata={
|
||||
"function_name": func.__name__,
|
||||
"function_kwargs": kwargs,
|
||||
"function_args": args,
|
||||
},
|
||||
)
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class InternalUsageCache:
|
||||
def __init__(self, dual_cache: DualCache):
|
||||
self.dual_cache: DualCache = dual_cache
|
||||
|
@ -1397,7 +1317,7 @@ class PrismaClient:
|
|||
|
||||
return
|
||||
|
||||
@log_to_opentelemetry
|
||||
@log_db_metrics
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
Exception, # base exception to catch for the backoff
|
||||
|
@ -1463,7 +1383,7 @@ class PrismaClient:
|
|||
max_time=10, # maximum total time to retry for
|
||||
on_backoff=on_backoff, # specifying the function to call on backoff
|
||||
)
|
||||
@log_to_opentelemetry
|
||||
@log_db_metrics
|
||||
async def get_data( # noqa: PLR0915
|
||||
self,
|
||||
token: Optional[Union[str, list]] = None,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue