[Feat] Improve OTEL Tracking - Require all Redis Cache reads to be logged on OTEL (#5881)

* fix use previous internal usage caching logic * fix test_dual_cache_uses_redis * redis track event_metadata in service logging * show otel error on _get_parent_otel_span_from_kwargs * track parent otel span on internal usage cache * update_request_status * fix internal usage cache * fix linting * fix test internal usage cache * fix linting error * show event metadata in redis set * fix test_get_team_redis * fix test_get_team_redis * test_proxy_logging_setup
2025-04-25 10:44:24 +00:00 · 2024-09-25 10:57:08 -07:00 · 2024-09-25 10:57:08 -07:00 · 7cbcf538c6
commit 7cbcf538c6
parent 4ec4d02474
9 changed files with 243 additions and 79 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -205,6 +205,83 @@ def log_to_opentelemetry(func):
    return wrapper


+class InternalUsageCache:
+    def __init__(self, dual_cache: DualCache):
+        self.dual_cache: DualCache = dual_cache
+
+    async def async_get_cache(
+        self,
+        key,
+        litellm_parent_otel_span: Union[Span, None],
+        local_only: bool = False,
+        **kwargs,
+    ) -> Any:
+        return await self.dual_cache.async_get_cache(
+            key=key,
+            local_only=local_only,
+            litellm_parent_otel_span=litellm_parent_otel_span,
+            **kwargs,
+        )
+
+    async def async_set_cache(
+        self,
+        key,
+        value,
+        litellm_parent_otel_span: Union[Span, None],
+        local_only: bool = False,
+        **kwargs,
+    ) -> None:
+        return await self.dual_cache.async_set_cache(
+            key=key,
+            value=value,
+            local_only=local_only,
+            litellm_parent_otel_span=litellm_parent_otel_span,
+            **kwargs,
+        )
+
+    async def async_increment_cache(
+        self,
+        key,
+        value: float,
+        litellm_parent_otel_span: Union[Span, None],
+        local_only: bool = False,
+        **kwargs,
+    ):
+        return await self.dual_cache.async_increment_cache(
+            key=key,
+            value=value,
+            local_only=local_only,
+            litellm_parent_otel_span=litellm_parent_otel_span,
+            **kwargs,
+        )
+
+    def set_cache(
+        self,
+        key,
+        value,
+        local_only: bool = False,
+        **kwargs,
+    ) -> None:
+        return self.dual_cache.set_cache(
+            key=key,
+            value=value,
+            local_only=local_only,
+            **kwargs,
+        )
+
+    def get_cache(
+        self,
+        key,
+        local_only: bool = False,
+        **kwargs,
+    ) -> Any:
+        return self.dual_cache.get_cache(
+            key=key,
+            local_only=local_only,
+            **kwargs,
+        )
+
+
 ### LOGGING ###
 class ProxyLogging:
    """
@ -222,9 +299,9 @@ class ProxyLogging:
        ## INITIALIZE  LITELLM CALLBACKS ##
        self.call_details: dict = {}
        self.call_details["user_api_key_cache"] = user_api_key_cache
-        self.internal_usage_cache = DualCache(
-            default_in_memory_ttl=1
-        )  # ping redis cache every 1s
+        self.internal_usage_cache: InternalUsageCache = InternalUsageCache(
+            dual_cache=DualCache(default_in_memory_ttl=1)  # ping redis cache every 1s
+        )
        self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
            self.internal_usage_cache
        )
@ -238,7 +315,7 @@ class ProxyLogging:
            alerting_threshold=self.alerting_threshold,
            alerting=self.alerting,
            alert_types=self.alert_types,
-            internal_usage_cache=self.internal_usage_cache,
+            internal_usage_cache=self.internal_usage_cache.dual_cache,
        )

    def update_values(
@ -283,7 +360,7 @@ class ProxyLogging:
                litellm.callbacks.append(self.slack_alerting_instance)  # type: ignore

        if redis_cache is not None:
-            self.internal_usage_cache.redis_cache = redis_cache
+            self.internal_usage_cache.dual_cache.redis_cache = redis_cache

    def _init_litellm_callbacks(self, llm_router: Optional[litellm.Router] = None):
        self.service_logging_obj = ServiceLogging()
@ -298,7 +375,7 @@ class ProxyLogging:
            if isinstance(callback, str):
                callback = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class(  # type: ignore
                    callback,
-                    internal_usage_cache=self.internal_usage_cache,
+                    internal_usage_cache=self.internal_usage_cache.dual_cache,
                    llm_router=llm_router,
                )
            if callback not in litellm.input_callback:
@ -347,6 +424,7 @@ class ProxyLogging:
            value=status,
            local_only=True,
            ttl=alerting_threshold,
+            litellm_parent_otel_span=None,
        )

    async def process_pre_call_hook_response(self, response, data, call_type):