Merge pull request #5047 from BerriAI/litellm_log_request_response_gcs

[Feat-Proxy] Log request/response on GCS
2024-08-05 09:05:56 -07:00 · 2024-08-05 09:05:56 -07:00 · 0214ff5fe0
commit 0214ff5fe0
parent f610fba58f 9314341b9e
6 changed files with 153 additions and 105 deletions
--- a/docs/my-website/docs/observability/gcs_bucket_integration.md
+++ b/docs/my-website/docs/observability/gcs_bucket_integration.md
@ -66,31 +66,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 Example payload of a `/chat/completion` request logged on GCS
 ```json
 {
-    "request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
+  "request_kwargs": {
    "call_type": "acompletion",
    "api_key": "",
    "cache_hit": "None",
    "startTime": "2024-08-01T14:27:12.563246",
    "endTime": "2024-08-01T14:27:12.572709",
    "completionStartTime": "2024-08-01T14:27:12.572709",
    "model": "gpt-3.5-turbo",
-    "user": "",
+    "messages": [
-    "team_id": "",
+      {
-    "metadata": "{}",
+        "role": "user",
-    "cache_key": "Cache OFF",
+        "content": "This is a test"
-    "spend": 0.000054999999999999995,
+      }
-    "total_tokens": 30,
+    ],
-    "prompt_tokens": 10,
+    "optional_params": {
-    "completion_tokens": 20,
+      "temperature": 0.7,
-    "request_tags": "[]",
+      "max_tokens": 10,
-    "end_user": "ishaan-2",
+      "user": "ishaan-2",
-    "api_base": "",
+      "extra_body": {}
-    "model_group": "",
+    }
-    "model_id": "",
+  },
-    "requester_ip_address": null,
+  "response_obj": {
-    "output": [
+    "id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
-        "{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
+    "choices": [
-    ]
+      {
        "finish_reason": "stop",
        "index": 0,
        "message": {
          "content": "Hi!",
          "role": "assistant",
          "tool_calls": null,
          "function_call": null
        }
      }
    ],
    "created": 1722868456,
    "model": "gpt-3.5-turbo",
    "object": "chat.completion",
    "system_fingerprint": null,
    "usage": {
      "prompt_tokens": 10,
      "completion_tokens": 20,
      "total_tokens": 30
    }
  },
  "start_time": "2024-08-05 07:34:16",
  "end_time": "2024-08-05 07:34:16"
 }
 ```
--- a/docs/my-website/docs/proxy/bucket.md
+++ b/docs/my-website/docs/proxy/bucket.md
@ -76,31 +76,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 Example payload of a `/chat/completion` request logged on GCS
 ```json
 {
-    "request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
+  "request_kwargs": {
    "call_type": "acompletion",
    "api_key": "",
    "cache_hit": "None",
    "startTime": "2024-08-01T14:27:12.563246",
    "endTime": "2024-08-01T14:27:12.572709",
    "completionStartTime": "2024-08-01T14:27:12.572709",
    "model": "gpt-3.5-turbo",
-    "user": "",
+    "messages": [
-    "team_id": "",
+      {
-    "metadata": "{}",
+        "role": "user",
-    "cache_key": "Cache OFF",
+        "content": "This is a test"
-    "spend": 0.000054999999999999995,
+      }
-    "total_tokens": 30,
+    ],
-    "prompt_tokens": 10,
+    "optional_params": {
-    "completion_tokens": 20,
+      "temperature": 0.7,
-    "request_tags": "[]",
+      "max_tokens": 10,
-    "end_user": "ishaan-2",
+      "user": "ishaan-2",
-    "api_base": "",
+      "extra_body": {}
-    "model_group": "",
+    }
-    "model_id": "",
+  },
-    "requester_ip_address": null,
+  "response_obj": {
-    "output": [
+    "id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
-        "{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
+    "choices": [
-    ]
+      {
        "finish_reason": "stop",
        "index": 0,
        "message": {
          "content": "Hi!",
          "role": "assistant",
          "tool_calls": null,
          "function_call": null
        }
      }
    ],
    "created": 1722868456,
    "model": "gpt-3.5-turbo",
    "object": "chat.completion",
    "system_fingerprint": null,
    "usage": {
      "prompt_tokens": 10,
      "completion_tokens": 20,
      "total_tokens": 30
    }
  },
  "start_time": "2024-08-05 07:34:16",
  "end_time": "2024-08-05 07:34:16"
 }
 ```
--- a/litellm/init.py
+++ b/litellm/init.py
@ -817,9 +817,19 @@ from .utils import (
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    TranscriptionResponse,
    TextCompletionResponse,
    get_provider_fields,
 )
 ALL_LITELLM_RESPONSE_TYPES = [
    ModelResponse,
    EmbeddingResponse,
    ImageResponse,
    TranscriptionResponse,
    TextCompletionResponse,
 ]
 from .types.utils import ImageObject
 from .llms.custom_llm import CustomLLM
 from .llms.huggingface_restapi import HuggingfaceConfig
--- a/litellm/integrations/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket.py
@ -1,7 +1,7 @@
 import json
 import os
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, TypedDict, Union
 import httpx
 from pydantic import BaseModel, Field
@ -9,13 +9,24 @@ from pydantic import BaseModel, Field
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.logging_utils import (
    convert_litellm_response_object_to_dict,
 )
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
 from litellm.proxy._types import CommonProxyErrors, SpendLogsPayload
-class GCSBucketPayload(SpendLogsPayload):
+class RequestKwargs(TypedDict):
    model: Optional[str]
    messages: Optional[List]
-    output: Optional[Union[Dict, str, List]]
+    optional_params: Optional[Dict[str, Any]]
 class GCSBucketPayload(TypedDict):
    request_kwargs: Optional[RequestKwargs]
    response_obj: Optional[Dict]
    start_time: str
    end_time: str
 class GCSBucketLogger(CustomLogger):
@ -58,12 +69,16 @@ class GCSBucketLogger(CustomLogger):
                kwargs,
                response_obj,
            )
            start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
            end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S")
            headers = await self.construct_request_headers()
            logging_payload: GCSBucketPayload = await self.get_gcs_payload(
-                kwargs, response_obj, start_time, end_time
+                kwargs, response_obj, start_time_str, end_time_str
            )
-            object_name = logging_payload["request_id"]
+            object_name = response_obj["id"]
            response = await self.async_httpx_client.post(
                headers=headers,
                url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
@ -106,60 +121,23 @@ class GCSBucketLogger(CustomLogger):
    async def get_gcs_payload(
        self, kwargs, response_obj, start_time, end_time
    ) -> GCSBucketPayload:
-        from litellm.proxy.spend_tracking.spend_tracking_utils import (
+        request_kwargs = RequestKwargs(
-            get_logging_payload,
+            model=kwargs.get("model", None),
            messages=kwargs.get("messages", None),
            optional_params=kwargs.get("optional_params", None),
        )
-
+        response_dict = {}
-        spend_logs_payload: SpendLogsPayload = get_logging_payload(
+        response_dict = convert_litellm_response_object_to_dict(
-            kwargs=kwargs,
+            response_obj=response_obj
            response_obj=response_obj,
            start_time=start_time,
            end_time=end_time,
            end_user_id=kwargs.get("user"),
        )
        gcs_payload: GCSBucketPayload = GCSBucketPayload(
-            **spend_logs_payload, messages=None, output=None
+            request_kwargs=request_kwargs,
            response_obj=response_dict,
            start_time=start_time,
            end_time=end_time,
        )
        gcs_payload["messages"] = kwargs.get("messages", None)
        gcs_payload["startTime"] = start_time.isoformat()
        gcs_payload["endTime"] = end_time.isoformat()
        if gcs_payload["completionStartTime"] is not None:
            gcs_payload["completionStartTime"] = gcs_payload[  # type: ignore
                "completionStartTime"  # type: ignore
            ].isoformat()
        output = None
        if response_obj is not None and (
            kwargs.get("call_type", None) == "embedding"
            or isinstance(response_obj, litellm.EmbeddingResponse)
        ):
            output = None
        elif response_obj is not None and isinstance(
            response_obj, litellm.ModelResponse
        ):
            output_list = []
            for choice in response_obj.choices:
                output_list.append(choice.json())
            output = output_list
        elif response_obj is not None and isinstance(
            response_obj, litellm.TextCompletionResponse
        ):
            output_list = []
            for choice in response_obj.choices:
                output_list.append(choice.json())
            output = output_list
        elif response_obj is not None and isinstance(
            response_obj, litellm.ImageResponse
        ):
            output = response_obj["data"]
        elif response_obj is not None and isinstance(
            response_obj, litellm.TranscriptionResponse
        ):
            output = response_obj["text"]
        gcs_payload["output"] = output
        return gcs_payload
    async def download_gcs_object(self, object_name):
--- a/litellm/litellm_core_utils/logging_utils.py
+++ b/litellm/litellm_core_utils/logging_utils.py
@ -0,0 +1,22 @@
 from typing import Any
 import litellm
 """
 Helper utils used for logging callbacks
 """
 def convert_litellm_response_object_to_dict(response_obj: Any) -> dict:
    """
    Convert a LiteLLM response object to a dictionary
    """
    if isinstance(response_obj, dict):
        return response_obj
    for _type in litellm.ALL_LITELLM_RESPONSE_TYPES:
        if isinstance(response_obj, _type):
            return response_obj.model_dump()
    # If it's not a LiteLLM type, return the object as is
    return dict(response_obj)
--- a/litellm/tests/test_gcs_bucket.py
+++ b/litellm/tests/test_gcs_bucket.py
@ -15,7 +15,7 @@ import pytest
 import litellm
 from litellm import completion
 from litellm._logging import verbose_logger
-from litellm.integrations.gcs_bucket import GCSBucketLogger
+from litellm.integrations.gcs_bucket import GCSBucketLogger, GCSBucketPayload
 verbose_logger.setLevel(logging.DEBUG)
@ -87,9 +87,15 @@ async def test_basic_gcs_logger():
    object_from_gcs = json.loads(object_from_gcs)
    print("object_from_gcs", object_from_gcs)
-    assert object_from_gcs["request_id"] == response.id
+    gcs_payload = GCSBucketPayload(**object_from_gcs)
-    assert object_from_gcs["call_type"] == "acompletion"
+
-    assert object_from_gcs["model"] == "gpt-3.5-turbo"
+    print("gcs_payload", gcs_payload)
    assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo"
    assert gcs_payload["request_kwargs"]["messages"] == [
        {"role": "user", "content": "This is a test"}
    ]
    assert gcs_payload["response_obj"]["choices"][0]["message"]["content"] == "Hi!"
    # Delete Object from GCS
    print("deleting object from GCS")