(feat) log error class, function_name on prometheus service failure hook + only log DB related failures on DB service hook (#6650)

* log error on prometheus service failure hook * use a more accurate function name for wrapper that handles logging db metrics * fix log_db_metrics * test_log_db_metrics_failure_error_types * fix linting * fix auth checks
2025-04-25 10:44:24 +00:00 · 2024-11-07 17:01:18 -08:00 · 2024-11-07 17:01:18 -08:00 · eb47117800
commit eb47117800
parent ae385cfcdc
8 changed files with 249 additions and 110 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -55,10 +55,6 @@ from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
 from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
-from litellm.litellm_core_utils.core_helpers import (
-    _get_parent_otel_span_from_kwargs,
-    get_litellm_metadata_from_kwargs,
-)
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from litellm.proxy._types import (
@ -77,6 +73,7 @@ from litellm.proxy.db.create_views import (
    create_missing_views,
    should_create_missing_views,
 )
+from litellm.proxy.db.log_db_metrics import log_db_metrics
 from litellm.proxy.db.prisma_client import PrismaWrapper
 from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
@ -137,83 +134,6 @@ def safe_deep_copy(data):
    return new_data


-def log_to_opentelemetry(func):
-    """
-    Decorator to log the duration of a DB related function to ServiceLogger()
-
-    Handles logging DB success/failure to ServiceLogger(), which logs to Prometheus, OTEL, Datadog
-    """
-
-    @wraps(func)
-    async def wrapper(*args, **kwargs):
-        start_time: datetime = datetime.now()
-
-        try:
-            result = await func(*args, **kwargs)
-            end_time: datetime = datetime.now()
-            from litellm.proxy.proxy_server import proxy_logging_obj
-
-            if "PROXY" not in func.__name__:
-                await proxy_logging_obj.service_logging_obj.async_service_success_hook(
-                    service=ServiceTypes.DB,
-                    call_type=func.__name__,
-                    parent_otel_span=kwargs.get("parent_otel_span", None),
-                    duration=(end_time - start_time).total_seconds(),
-                    start_time=start_time,
-                    end_time=end_time,
-                    event_metadata={
-                        "function_name": func.__name__,
-                        "function_kwargs": kwargs,
-                        "function_args": args,
-                    },
-                )
-            elif (
-                # in litellm custom callbacks kwargs is passed as arg[0]
-                # https://docs.litellm.ai/docs/observability/custom_callback#callback-functions
-                args is not None
-                and len(args) > 0
-                and isinstance(args[0], dict)
-            ):
-                passed_kwargs = args[0]
-                parent_otel_span = _get_parent_otel_span_from_kwargs(
-                    kwargs=passed_kwargs
-                )
-                if parent_otel_span is not None:
-                    metadata = get_litellm_metadata_from_kwargs(kwargs=passed_kwargs)
-                    await proxy_logging_obj.service_logging_obj.async_service_success_hook(
-                        service=ServiceTypes.BATCH_WRITE_TO_DB,
-                        call_type=func.__name__,
-                        parent_otel_span=parent_otel_span,
-                        duration=0.0,
-                        start_time=start_time,
-                        end_time=end_time,
-                        event_metadata=metadata,
-                    )
-            # end of logging to otel
-            return result
-        except Exception as e:
-            from litellm.proxy.proxy_server import proxy_logging_obj
-
-            end_time: datetime = datetime.now()
-            await proxy_logging_obj.service_logging_obj.async_service_failure_hook(
-                error=e,
-                service=ServiceTypes.DB,
-                call_type=func.__name__,
-                parent_otel_span=kwargs.get("parent_otel_span"),
-                duration=(end_time - start_time).total_seconds(),
-                start_time=start_time,
-                end_time=end_time,
-                event_metadata={
-                    "function_name": func.__name__,
-                    "function_kwargs": kwargs,
-                    "function_args": args,
-                },
-            )
-            raise e
-
-    return wrapper
-
-
 class InternalUsageCache:
    def __init__(self, dual_cache: DualCache):
        self.dual_cache: DualCache = dual_cache
@ -1397,7 +1317,7 @@ class PrismaClient:

        return

-    @log_to_opentelemetry
+    @log_db_metrics
    @backoff.on_exception(
        backoff.expo,
        Exception,  # base exception to catch for the backoff
@ -1463,7 +1383,7 @@ class PrismaClient:
        max_time=10,  # maximum total time to retry for
        on_backoff=on_backoff,  # specifying the function to call on backoff
    )
-    @log_to_opentelemetry
+    @log_db_metrics
    async def get_data(  # noqa: PLR0915
        self,
        token: Optional[Union[str, list]] = None,