(feat) add usage / cost tracking for Anthropic passthrough routes (#6835)

* move _process_response in transformation * fix AnthropicConfig test * add AnthropicConfig * fix anthropic_passthrough_handler * fix get_response_body * fix check for streaming response * use 1 helper to return stream_response on passthrough
2025-04-26 11:14:04 +00:00 · 2024-11-20 17:25:12 -08:00 · 2024-11-20 17:25:12 -08:00 · c991864d69
commit c991864d69
parent 2ee4fbb0a5
3 changed files with 142 additions and 30 deletions
--- a/litellm/proxy/pass_through_endpoints/success_handler.py
+++ b/litellm/proxy/pass_through_endpoints/success_handler.py
@ -2,12 +2,17 @@ import json
 import re
 import threading
 from datetime import datetime
-from typing import Union
+from typing import Optional, Union

 import httpx

 import litellm
+from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.litellm_logging import (
+    get_standard_logging_object_payload,
+)
+from litellm.llms.anthropic.chat.transformation import AnthropicConfig
 from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
    VertexLLM,
 )
@ -23,9 +28,13 @@ class PassThroughEndpointLogging:
            "predict",
        ]

+        # Anthropic
+        self.TRACKED_ANTHROPIC_ROUTES = ["/messages"]
+
    async def pass_through_async_success_handler(
        self,
        httpx_response: httpx.Response,
+        response_body: Optional[dict],
        logging_obj: LiteLLMLoggingObj,
        url_route: str,
        result: str,
@ -45,6 +54,18 @@ class PassThroughEndpointLogging:
                cache_hit=cache_hit,
                **kwargs,
            )
+        elif self.is_anthropic_route(url_route):
+            await self.anthropic_passthrough_handler(
+                httpx_response=httpx_response,
+                response_body=response_body or {},
+                logging_obj=logging_obj,
+                url_route=url_route,
+                result=result,
+                start_time=start_time,
+                end_time=end_time,
+                cache_hit=cache_hit,
+                **kwargs,
+            )
        else:
            standard_logging_response_object = StandardPassThroughResponseObject(
                response=httpx_response.text
@ -76,6 +97,12 @@ class PassThroughEndpointLogging:
                return True
        return False

+    def is_anthropic_route(self, url_route: str):
+        for route in self.TRACKED_ANTHROPIC_ROUTES:
+            if route in url_route:
+                return True
+        return False
+
    def extract_model_from_url(self, url: str) -> str:
        pattern = r"/models/([^:]+)"
        match = re.search(pattern, url)
@ -83,6 +110,72 @@ class PassThroughEndpointLogging:
            return match.group(1)
        return "unknown"

+    async def anthropic_passthrough_handler(
+        self,
+        httpx_response: httpx.Response,
+        response_body: dict,
+        logging_obj: LiteLLMLoggingObj,
+        url_route: str,
+        result: str,
+        start_time: datetime,
+        end_time: datetime,
+        cache_hit: bool,
+        **kwargs,
+    ):
+        """
+        Transforms Anthropic response to OpenAI response, generates a standard logging object so downstream logging can be handled
+        """
+        model = response_body.get("model", "")
+        litellm_model_response: litellm.ModelResponse = (
+            AnthropicConfig._process_response(
+                response=httpx_response,
+                model_response=litellm.ModelResponse(),
+                model=model,
+                stream=False,
+                messages=[],
+                logging_obj=logging_obj,
+                optional_params={},
+                api_key="",
+                data={},
+                print_verbose=litellm.print_verbose,
+                encoding=None,
+                json_mode=False,
+            )
+        )
+
+        response_cost = litellm.completion_cost(
+            completion_response=litellm_model_response,
+            model=model,
+        )
+        kwargs["response_cost"] = response_cost
+        kwargs["model"] = model
+
+        # Make standard logging object for Vertex AI
+        standard_logging_object = get_standard_logging_object_payload(
+            kwargs=kwargs,
+            init_response_obj=litellm_model_response,
+            start_time=start_time,
+            end_time=end_time,
+            logging_obj=logging_obj,
+            status="success",
+        )
+
+        # pretty print standard logging object
+        verbose_proxy_logger.debug(
+            "standard_logging_object= %s", json.dumps(standard_logging_object, indent=4)
+        )
+        kwargs["standard_logging_object"] = standard_logging_object
+
+        await logging_obj.async_success_handler(
+            result=litellm_model_response,
+            start_time=start_time,
+            end_time=end_time,
+            cache_hit=cache_hit,
+            **kwargs,
+        )
+
+        pass
+
    async def vertex_passthrough_handler(
        self,
        httpx_response: httpx.Response,