forked from phoenix/litellm-mirror
Merge pull request #5047 from BerriAI/litellm_log_request_response_gcs
[Feat-Proxy] Log request/response on GCS
This commit is contained in:
commit
0214ff5fe0
6 changed files with 153 additions and 105 deletions
|
@ -66,31 +66,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
Example payload of a `/chat/completion` request logged on GCS
|
||||
```json
|
||||
{
|
||||
"request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
|
||||
"call_type": "acompletion",
|
||||
"api_key": "",
|
||||
"cache_hit": "None",
|
||||
"startTime": "2024-08-01T14:27:12.563246",
|
||||
"endTime": "2024-08-01T14:27:12.572709",
|
||||
"completionStartTime": "2024-08-01T14:27:12.572709",
|
||||
"request_kwargs": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"user": "",
|
||||
"team_id": "",
|
||||
"metadata": "{}",
|
||||
"cache_key": "Cache OFF",
|
||||
"spend": 0.000054999999999999995,
|
||||
"total_tokens": 30,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "This is a test"
|
||||
}
|
||||
],
|
||||
"optional_params": {
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 10,
|
||||
"user": "ishaan-2",
|
||||
"extra_body": {}
|
||||
}
|
||||
},
|
||||
"response_obj": {
|
||||
"id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "Hi!",
|
||||
"role": "assistant",
|
||||
"tool_calls": null,
|
||||
"function_call": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1722868456,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"request_tags": "[]",
|
||||
"end_user": "ishaan-2",
|
||||
"api_base": "",
|
||||
"model_group": "",
|
||||
"model_id": "",
|
||||
"requester_ip_address": null,
|
||||
"output": [
|
||||
"{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
|
||||
]
|
||||
"total_tokens": 30
|
||||
}
|
||||
},
|
||||
"start_time": "2024-08-05 07:34:16",
|
||||
"end_time": "2024-08-05 07:34:16"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -76,31 +76,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|||
Example payload of a `/chat/completion` request logged on GCS
|
||||
```json
|
||||
{
|
||||
"request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
|
||||
"call_type": "acompletion",
|
||||
"api_key": "",
|
||||
"cache_hit": "None",
|
||||
"startTime": "2024-08-01T14:27:12.563246",
|
||||
"endTime": "2024-08-01T14:27:12.572709",
|
||||
"completionStartTime": "2024-08-01T14:27:12.572709",
|
||||
"request_kwargs": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"user": "",
|
||||
"team_id": "",
|
||||
"metadata": "{}",
|
||||
"cache_key": "Cache OFF",
|
||||
"spend": 0.000054999999999999995,
|
||||
"total_tokens": 30,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "This is a test"
|
||||
}
|
||||
],
|
||||
"optional_params": {
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 10,
|
||||
"user": "ishaan-2",
|
||||
"extra_body": {}
|
||||
}
|
||||
},
|
||||
"response_obj": {
|
||||
"id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "Hi!",
|
||||
"role": "assistant",
|
||||
"tool_calls": null,
|
||||
"function_call": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1722868456,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"request_tags": "[]",
|
||||
"end_user": "ishaan-2",
|
||||
"api_base": "",
|
||||
"model_group": "",
|
||||
"model_id": "",
|
||||
"requester_ip_address": null,
|
||||
"output": [
|
||||
"{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
|
||||
]
|
||||
"total_tokens": 30
|
||||
}
|
||||
},
|
||||
"start_time": "2024-08-05 07:34:16",
|
||||
"end_time": "2024-08-05 07:34:16"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -817,9 +817,19 @@ from .utils import (
|
|||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
TranscriptionResponse,
|
||||
TextCompletionResponse,
|
||||
get_provider_fields,
|
||||
)
|
||||
|
||||
ALL_LITELLM_RESPONSE_TYPES = [
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
TranscriptionResponse,
|
||||
TextCompletionResponse,
|
||||
]
|
||||
|
||||
from .types.utils import ImageObject
|
||||
from .llms.custom_llm import CustomLLM
|
||||
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, TypedDict, Union
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel, Field
|
||||
|
@ -9,13 +9,24 @@ from pydantic import BaseModel, Field
|
|||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.logging_utils import (
|
||||
convert_litellm_response_object_to_dict,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.proxy._types import CommonProxyErrors, SpendLogsPayload
|
||||
|
||||
|
||||
class GCSBucketPayload(SpendLogsPayload):
|
||||
class RequestKwargs(TypedDict):
|
||||
model: Optional[str]
|
||||
messages: Optional[List]
|
||||
output: Optional[Union[Dict, str, List]]
|
||||
optional_params: Optional[Dict[str, Any]]
|
||||
|
||||
|
||||
class GCSBucketPayload(TypedDict):
|
||||
request_kwargs: Optional[RequestKwargs]
|
||||
response_obj: Optional[Dict]
|
||||
start_time: str
|
||||
end_time: str
|
||||
|
||||
|
||||
class GCSBucketLogger(CustomLogger):
|
||||
|
@ -58,12 +69,16 @@ class GCSBucketLogger(CustomLogger):
|
|||
kwargs,
|
||||
response_obj,
|
||||
)
|
||||
|
||||
start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
headers = await self.construct_request_headers()
|
||||
|
||||
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
|
||||
kwargs, response_obj, start_time, end_time
|
||||
kwargs, response_obj, start_time_str, end_time_str
|
||||
)
|
||||
|
||||
object_name = logging_payload["request_id"]
|
||||
object_name = response_obj["id"]
|
||||
response = await self.async_httpx_client.post(
|
||||
headers=headers,
|
||||
url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
|
||||
|
@ -106,60 +121,23 @@ class GCSBucketLogger(CustomLogger):
|
|||
async def get_gcs_payload(
|
||||
self, kwargs, response_obj, start_time, end_time
|
||||
) -> GCSBucketPayload:
|
||||
from litellm.proxy.spend_tracking.spend_tracking_utils import (
|
||||
get_logging_payload,
|
||||
request_kwargs = RequestKwargs(
|
||||
model=kwargs.get("model", None),
|
||||
messages=kwargs.get("messages", None),
|
||||
optional_params=kwargs.get("optional_params", None),
|
||||
)
|
||||
|
||||
spend_logs_payload: SpendLogsPayload = get_logging_payload(
|
||||
kwargs=kwargs,
|
||||
response_obj=response_obj,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
end_user_id=kwargs.get("user"),
|
||||
response_dict = {}
|
||||
response_dict = convert_litellm_response_object_to_dict(
|
||||
response_obj=response_obj
|
||||
)
|
||||
|
||||
gcs_payload: GCSBucketPayload = GCSBucketPayload(
|
||||
**spend_logs_payload, messages=None, output=None
|
||||
request_kwargs=request_kwargs,
|
||||
response_obj=response_dict,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
gcs_payload["messages"] = kwargs.get("messages", None)
|
||||
gcs_payload["startTime"] = start_time.isoformat()
|
||||
gcs_payload["endTime"] = end_time.isoformat()
|
||||
|
||||
if gcs_payload["completionStartTime"] is not None:
|
||||
gcs_payload["completionStartTime"] = gcs_payload[ # type: ignore
|
||||
"completionStartTime" # type: ignore
|
||||
].isoformat()
|
||||
|
||||
output = None
|
||||
if response_obj is not None and (
|
||||
kwargs.get("call_type", None) == "embedding"
|
||||
or isinstance(response_obj, litellm.EmbeddingResponse)
|
||||
):
|
||||
output = None
|
||||
elif response_obj is not None and isinstance(
|
||||
response_obj, litellm.ModelResponse
|
||||
):
|
||||
output_list = []
|
||||
for choice in response_obj.choices:
|
||||
output_list.append(choice.json())
|
||||
output = output_list
|
||||
elif response_obj is not None and isinstance(
|
||||
response_obj, litellm.TextCompletionResponse
|
||||
):
|
||||
output_list = []
|
||||
for choice in response_obj.choices:
|
||||
output_list.append(choice.json())
|
||||
output = output_list
|
||||
elif response_obj is not None and isinstance(
|
||||
response_obj, litellm.ImageResponse
|
||||
):
|
||||
output = response_obj["data"]
|
||||
elif response_obj is not None and isinstance(
|
||||
response_obj, litellm.TranscriptionResponse
|
||||
):
|
||||
output = response_obj["text"]
|
||||
|
||||
gcs_payload["output"] = output
|
||||
return gcs_payload
|
||||
|
||||
async def download_gcs_object(self, object_name):
|
||||
|
|
22
litellm/litellm_core_utils/logging_utils.py
Normal file
22
litellm/litellm_core_utils/logging_utils.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from typing import Any
|
||||
|
||||
import litellm
|
||||
|
||||
"""
|
||||
Helper utils used for logging callbacks
|
||||
"""
|
||||
|
||||
|
||||
def convert_litellm_response_object_to_dict(response_obj: Any) -> dict:
|
||||
"""
|
||||
Convert a LiteLLM response object to a dictionary
|
||||
|
||||
"""
|
||||
if isinstance(response_obj, dict):
|
||||
return response_obj
|
||||
for _type in litellm.ALL_LITELLM_RESPONSE_TYPES:
|
||||
if isinstance(response_obj, _type):
|
||||
return response_obj.model_dump()
|
||||
|
||||
# If it's not a LiteLLM type, return the object as is
|
||||
return dict(response_obj)
|
|
@ -15,7 +15,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm import completion
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.integrations.gcs_bucket import GCSBucketLogger
|
||||
from litellm.integrations.gcs_bucket import GCSBucketLogger, GCSBucketPayload
|
||||
|
||||
verbose_logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
@ -87,9 +87,15 @@ async def test_basic_gcs_logger():
|
|||
object_from_gcs = json.loads(object_from_gcs)
|
||||
print("object_from_gcs", object_from_gcs)
|
||||
|
||||
assert object_from_gcs["request_id"] == response.id
|
||||
assert object_from_gcs["call_type"] == "acompletion"
|
||||
assert object_from_gcs["model"] == "gpt-3.5-turbo"
|
||||
gcs_payload = GCSBucketPayload(**object_from_gcs)
|
||||
|
||||
print("gcs_payload", gcs_payload)
|
||||
|
||||
assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo"
|
||||
assert gcs_payload["request_kwargs"]["messages"] == [
|
||||
{"role": "user", "content": "This is a test"}
|
||||
]
|
||||
assert gcs_payload["response_obj"]["choices"][0]["message"]["content"] == "Hi!"
|
||||
|
||||
# Delete Object from GCS
|
||||
print("deleting object from GCS")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue