Merge pull request #5047 from BerriAI/litellm_log_request_response_gcs

[Feat-Proxy] Log request/response on GCS
This commit is contained in:
Ishaan Jaff 2024-08-05 09:05:56 -07:00 committed by GitHub
commit 0214ff5fe0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 153 additions and 105 deletions

View file

@ -66,31 +66,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
Example payload of a `/chat/completion` request logged on GCS
```json
{
"request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
"call_type": "acompletion",
"api_key": "",
"cache_hit": "None",
"startTime": "2024-08-01T14:27:12.563246",
"endTime": "2024-08-01T14:27:12.572709",
"completionStartTime": "2024-08-01T14:27:12.572709",
"request_kwargs": {
"model": "gpt-3.5-turbo",
"user": "",
"team_id": "",
"metadata": "{}",
"cache_key": "Cache OFF",
"spend": 0.000054999999999999995,
"total_tokens": 30,
"messages": [
{
"role": "user",
"content": "This is a test"
}
],
"optional_params": {
"temperature": 0.7,
"max_tokens": 10,
"user": "ishaan-2",
"extra_body": {}
}
},
"response_obj": {
"id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Hi!",
"role": "assistant",
"tool_calls": null,
"function_call": null
}
}
],
"created": 1722868456,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": {
"prompt_tokens": 10,
"completion_tokens": 20,
"request_tags": "[]",
"end_user": "ishaan-2",
"api_base": "",
"model_group": "",
"model_id": "",
"requester_ip_address": null,
"output": [
"{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
]
"total_tokens": 30
}
},
"start_time": "2024-08-05 07:34:16",
"end_time": "2024-08-05 07:34:16"
}
```

View file

@ -76,31 +76,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
Example payload of a `/chat/completion` request logged on GCS
```json
{
"request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
"call_type": "acompletion",
"api_key": "",
"cache_hit": "None",
"startTime": "2024-08-01T14:27:12.563246",
"endTime": "2024-08-01T14:27:12.572709",
"completionStartTime": "2024-08-01T14:27:12.572709",
"request_kwargs": {
"model": "gpt-3.5-turbo",
"user": "",
"team_id": "",
"metadata": "{}",
"cache_key": "Cache OFF",
"spend": 0.000054999999999999995,
"total_tokens": 30,
"messages": [
{
"role": "user",
"content": "This is a test"
}
],
"optional_params": {
"temperature": 0.7,
"max_tokens": 10,
"user": "ishaan-2",
"extra_body": {}
}
},
"response_obj": {
"id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Hi!",
"role": "assistant",
"tool_calls": null,
"function_call": null
}
}
],
"created": 1722868456,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": {
"prompt_tokens": 10,
"completion_tokens": 20,
"request_tags": "[]",
"end_user": "ishaan-2",
"api_base": "",
"model_group": "",
"model_id": "",
"requester_ip_address": null,
"output": [
"{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
]
"total_tokens": 30
}
},
"start_time": "2024-08-05 07:34:16",
"end_time": "2024-08-05 07:34:16"
}
```

View file

@ -817,9 +817,19 @@ from .utils import (
ModelResponse,
EmbeddingResponse,
ImageResponse,
TranscriptionResponse,
TextCompletionResponse,
get_provider_fields,
)
ALL_LITELLM_RESPONSE_TYPES = [
ModelResponse,
EmbeddingResponse,
ImageResponse,
TranscriptionResponse,
TextCompletionResponse,
]
from .types.utils import ImageObject
from .llms.custom_llm import CustomLLM
from .llms.huggingface_restapi import HuggingfaceConfig

View file

@ -1,7 +1,7 @@
import json
import os
from datetime import datetime
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, TypedDict, Union
import httpx
from pydantic import BaseModel, Field
@ -9,13 +9,24 @@ from pydantic import BaseModel, Field
import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_dict,
)
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.proxy._types import CommonProxyErrors, SpendLogsPayload
class GCSBucketPayload(SpendLogsPayload):
class RequestKwargs(TypedDict):
model: Optional[str]
messages: Optional[List]
output: Optional[Union[Dict, str, List]]
optional_params: Optional[Dict[str, Any]]
class GCSBucketPayload(TypedDict):
request_kwargs: Optional[RequestKwargs]
response_obj: Optional[Dict]
start_time: str
end_time: str
class GCSBucketLogger(CustomLogger):
@ -58,12 +69,16 @@ class GCSBucketLogger(CustomLogger):
kwargs,
response_obj,
)
start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S")
headers = await self.construct_request_headers()
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
kwargs, response_obj, start_time, end_time
kwargs, response_obj, start_time_str, end_time_str
)
object_name = logging_payload["request_id"]
object_name = response_obj["id"]
response = await self.async_httpx_client.post(
headers=headers,
url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
@ -106,60 +121,23 @@ class GCSBucketLogger(CustomLogger):
async def get_gcs_payload(
self, kwargs, response_obj, start_time, end_time
) -> GCSBucketPayload:
from litellm.proxy.spend_tracking.spend_tracking_utils import (
get_logging_payload,
request_kwargs = RequestKwargs(
model=kwargs.get("model", None),
messages=kwargs.get("messages", None),
optional_params=kwargs.get("optional_params", None),
)
spend_logs_payload: SpendLogsPayload = get_logging_payload(
kwargs=kwargs,
response_obj=response_obj,
start_time=start_time,
end_time=end_time,
end_user_id=kwargs.get("user"),
response_dict = {}
response_dict = convert_litellm_response_object_to_dict(
response_obj=response_obj
)
gcs_payload: GCSBucketPayload = GCSBucketPayload(
**spend_logs_payload, messages=None, output=None
request_kwargs=request_kwargs,
response_obj=response_dict,
start_time=start_time,
end_time=end_time,
)
gcs_payload["messages"] = kwargs.get("messages", None)
gcs_payload["startTime"] = start_time.isoformat()
gcs_payload["endTime"] = end_time.isoformat()
if gcs_payload["completionStartTime"] is not None:
gcs_payload["completionStartTime"] = gcs_payload[ # type: ignore
"completionStartTime" # type: ignore
].isoformat()
output = None
if response_obj is not None and (
kwargs.get("call_type", None) == "embedding"
or isinstance(response_obj, litellm.EmbeddingResponse)
):
output = None
elif response_obj is not None and isinstance(
response_obj, litellm.ModelResponse
):
output_list = []
for choice in response_obj.choices:
output_list.append(choice.json())
output = output_list
elif response_obj is not None and isinstance(
response_obj, litellm.TextCompletionResponse
):
output_list = []
for choice in response_obj.choices:
output_list.append(choice.json())
output = output_list
elif response_obj is not None and isinstance(
response_obj, litellm.ImageResponse
):
output = response_obj["data"]
elif response_obj is not None and isinstance(
response_obj, litellm.TranscriptionResponse
):
output = response_obj["text"]
gcs_payload["output"] = output
return gcs_payload
async def download_gcs_object(self, object_name):

View file

@ -0,0 +1,22 @@
from typing import Any
import litellm
"""
Helper utils used for logging callbacks
"""
def convert_litellm_response_object_to_dict(response_obj: Any) -> dict:
"""
Convert a LiteLLM response object to a dictionary
"""
if isinstance(response_obj, dict):
return response_obj
for _type in litellm.ALL_LITELLM_RESPONSE_TYPES:
if isinstance(response_obj, _type):
return response_obj.model_dump()
# If it's not a LiteLLM type, return the object as is
return dict(response_obj)

View file

@ -15,7 +15,7 @@ import pytest
import litellm
from litellm import completion
from litellm._logging import verbose_logger
from litellm.integrations.gcs_bucket import GCSBucketLogger
from litellm.integrations.gcs_bucket import GCSBucketLogger, GCSBucketPayload
verbose_logger.setLevel(logging.DEBUG)
@ -87,9 +87,15 @@ async def test_basic_gcs_logger():
object_from_gcs = json.loads(object_from_gcs)
print("object_from_gcs", object_from_gcs)
assert object_from_gcs["request_id"] == response.id
assert object_from_gcs["call_type"] == "acompletion"
assert object_from_gcs["model"] == "gpt-3.5-turbo"
gcs_payload = GCSBucketPayload(**object_from_gcs)
print("gcs_payload", gcs_payload)
assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo"
assert gcs_payload["request_kwargs"]["messages"] == [
{"role": "user", "content": "This is a test"}
]
assert gcs_payload["response_obj"]["choices"][0]["message"]["content"] == "Hi!"
# Delete Object from GCS
print("deleting object from GCS")