forked from phoenix/litellm-mirror
Merge pull request #5047 from BerriAI/litellm_log_request_response_gcs
[Feat-Proxy] Log request/response on GCS
This commit is contained in:
commit
0214ff5fe0
6 changed files with 153 additions and 105 deletions
|
@ -66,31 +66,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
Example payload of a `/chat/completion` request logged on GCS
|
Example payload of a `/chat/completion` request logged on GCS
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
|
"request_kwargs": {
|
||||||
"call_type": "acompletion",
|
|
||||||
"api_key": "",
|
|
||||||
"cache_hit": "None",
|
|
||||||
"startTime": "2024-08-01T14:27:12.563246",
|
|
||||||
"endTime": "2024-08-01T14:27:12.572709",
|
|
||||||
"completionStartTime": "2024-08-01T14:27:12.572709",
|
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"user": "",
|
"messages": [
|
||||||
"team_id": "",
|
{
|
||||||
"metadata": "{}",
|
"role": "user",
|
||||||
"cache_key": "Cache OFF",
|
"content": "This is a test"
|
||||||
"spend": 0.000054999999999999995,
|
}
|
||||||
"total_tokens": 30,
|
],
|
||||||
"prompt_tokens": 10,
|
"optional_params": {
|
||||||
"completion_tokens": 20,
|
"temperature": 0.7,
|
||||||
"request_tags": "[]",
|
"max_tokens": 10,
|
||||||
"end_user": "ishaan-2",
|
"user": "ishaan-2",
|
||||||
"api_base": "",
|
"extra_body": {}
|
||||||
"model_group": "",
|
}
|
||||||
"model_id": "",
|
},
|
||||||
"requester_ip_address": null,
|
"response_obj": {
|
||||||
"output": [
|
"id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
|
||||||
"{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
|
"choices": [
|
||||||
]
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": "Hi!",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null,
|
||||||
|
"function_call": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1722868456,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 10,
|
||||||
|
"completion_tokens": 20,
|
||||||
|
"total_tokens": 30
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"start_time": "2024-08-05 07:34:16",
|
||||||
|
"end_time": "2024-08-05 07:34:16"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -76,31 +76,47 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
Example payload of a `/chat/completion` request logged on GCS
|
Example payload of a `/chat/completion` request logged on GCS
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"request_id": "chatcmpl-3946ddc2-bcfe-43f6-9b8e-2427951de85c",
|
"request_kwargs": {
|
||||||
"call_type": "acompletion",
|
|
||||||
"api_key": "",
|
|
||||||
"cache_hit": "None",
|
|
||||||
"startTime": "2024-08-01T14:27:12.563246",
|
|
||||||
"endTime": "2024-08-01T14:27:12.572709",
|
|
||||||
"completionStartTime": "2024-08-01T14:27:12.572709",
|
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"user": "",
|
"messages": [
|
||||||
"team_id": "",
|
{
|
||||||
"metadata": "{}",
|
"role": "user",
|
||||||
"cache_key": "Cache OFF",
|
"content": "This is a test"
|
||||||
"spend": 0.000054999999999999995,
|
}
|
||||||
"total_tokens": 30,
|
],
|
||||||
"prompt_tokens": 10,
|
"optional_params": {
|
||||||
"completion_tokens": 20,
|
"temperature": 0.7,
|
||||||
"request_tags": "[]",
|
"max_tokens": 10,
|
||||||
"end_user": "ishaan-2",
|
"user": "ishaan-2",
|
||||||
"api_base": "",
|
"extra_body": {}
|
||||||
"model_group": "",
|
}
|
||||||
"model_id": "",
|
},
|
||||||
"requester_ip_address": null,
|
"response_obj": {
|
||||||
"output": [
|
"id": "chatcmpl-bd836a8c-89bc-4abd-bee5-e3f1ebfdb541",
|
||||||
"{\"finish_reason\":\"stop\",\"index\":0,\"message\":{\"content\":\"Hi!\",\"role\":\"assistant\",\"tool_calls\":null,\"function_call\":null}}"
|
"choices": [
|
||||||
]
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": "Hi!",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null,
|
||||||
|
"function_call": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1722868456,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 10,
|
||||||
|
"completion_tokens": 20,
|
||||||
|
"total_tokens": 30
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"start_time": "2024-08-05 07:34:16",
|
||||||
|
"end_time": "2024-08-05 07:34:16"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -817,9 +817,19 @@ from .utils import (
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
EmbeddingResponse,
|
EmbeddingResponse,
|
||||||
ImageResponse,
|
ImageResponse,
|
||||||
|
TranscriptionResponse,
|
||||||
|
TextCompletionResponse,
|
||||||
get_provider_fields,
|
get_provider_fields,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ALL_LITELLM_RESPONSE_TYPES = [
|
||||||
|
ModelResponse,
|
||||||
|
EmbeddingResponse,
|
||||||
|
ImageResponse,
|
||||||
|
TranscriptionResponse,
|
||||||
|
TextCompletionResponse,
|
||||||
|
]
|
||||||
|
|
||||||
from .types.utils import ImageObject
|
from .types.utils import ImageObject
|
||||||
from .llms.custom_llm import CustomLLM
|
from .llms.custom_llm import CustomLLM
|
||||||
from .llms.huggingface_restapi import HuggingfaceConfig
|
from .llms.huggingface_restapi import HuggingfaceConfig
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, TypedDict, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
@ -9,13 +9,24 @@ from pydantic import BaseModel, Field
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.litellm_core_utils.logging_utils import (
|
||||||
|
convert_litellm_response_object_to_dict,
|
||||||
|
)
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
from litellm.proxy._types import CommonProxyErrors, SpendLogsPayload
|
from litellm.proxy._types import CommonProxyErrors, SpendLogsPayload
|
||||||
|
|
||||||
|
|
||||||
class GCSBucketPayload(SpendLogsPayload):
|
class RequestKwargs(TypedDict):
|
||||||
|
model: Optional[str]
|
||||||
messages: Optional[List]
|
messages: Optional[List]
|
||||||
output: Optional[Union[Dict, str, List]]
|
optional_params: Optional[Dict[str, Any]]
|
||||||
|
|
||||||
|
|
||||||
|
class GCSBucketPayload(TypedDict):
|
||||||
|
request_kwargs: Optional[RequestKwargs]
|
||||||
|
response_obj: Optional[Dict]
|
||||||
|
start_time: str
|
||||||
|
end_time: str
|
||||||
|
|
||||||
|
|
||||||
class GCSBucketLogger(CustomLogger):
|
class GCSBucketLogger(CustomLogger):
|
||||||
|
@ -58,12 +69,16 @@ class GCSBucketLogger(CustomLogger):
|
||||||
kwargs,
|
kwargs,
|
||||||
response_obj,
|
response_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
start_time_str = start_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
end_time_str = end_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
headers = await self.construct_request_headers()
|
headers = await self.construct_request_headers()
|
||||||
|
|
||||||
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
|
logging_payload: GCSBucketPayload = await self.get_gcs_payload(
|
||||||
kwargs, response_obj, start_time, end_time
|
kwargs, response_obj, start_time_str, end_time_str
|
||||||
)
|
)
|
||||||
|
|
||||||
object_name = logging_payload["request_id"]
|
object_name = response_obj["id"]
|
||||||
response = await self.async_httpx_client.post(
|
response = await self.async_httpx_client.post(
|
||||||
headers=headers,
|
headers=headers,
|
||||||
url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
|
url=f"https://storage.googleapis.com/upload/storage/v1/b/{self.BUCKET_NAME}/o?uploadType=media&name={object_name}",
|
||||||
|
@ -106,60 +121,23 @@ class GCSBucketLogger(CustomLogger):
|
||||||
async def get_gcs_payload(
|
async def get_gcs_payload(
|
||||||
self, kwargs, response_obj, start_time, end_time
|
self, kwargs, response_obj, start_time, end_time
|
||||||
) -> GCSBucketPayload:
|
) -> GCSBucketPayload:
|
||||||
from litellm.proxy.spend_tracking.spend_tracking_utils import (
|
request_kwargs = RequestKwargs(
|
||||||
get_logging_payload,
|
model=kwargs.get("model", None),
|
||||||
|
messages=kwargs.get("messages", None),
|
||||||
|
optional_params=kwargs.get("optional_params", None),
|
||||||
)
|
)
|
||||||
|
response_dict = {}
|
||||||
spend_logs_payload: SpendLogsPayload = get_logging_payload(
|
response_dict = convert_litellm_response_object_to_dict(
|
||||||
kwargs=kwargs,
|
response_obj=response_obj
|
||||||
response_obj=response_obj,
|
|
||||||
start_time=start_time,
|
|
||||||
end_time=end_time,
|
|
||||||
end_user_id=kwargs.get("user"),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
gcs_payload: GCSBucketPayload = GCSBucketPayload(
|
gcs_payload: GCSBucketPayload = GCSBucketPayload(
|
||||||
**spend_logs_payload, messages=None, output=None
|
request_kwargs=request_kwargs,
|
||||||
|
response_obj=response_dict,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
gcs_payload["messages"] = kwargs.get("messages", None)
|
|
||||||
gcs_payload["startTime"] = start_time.isoformat()
|
|
||||||
gcs_payload["endTime"] = end_time.isoformat()
|
|
||||||
|
|
||||||
if gcs_payload["completionStartTime"] is not None:
|
|
||||||
gcs_payload["completionStartTime"] = gcs_payload[ # type: ignore
|
|
||||||
"completionStartTime" # type: ignore
|
|
||||||
].isoformat()
|
|
||||||
|
|
||||||
output = None
|
|
||||||
if response_obj is not None and (
|
|
||||||
kwargs.get("call_type", None) == "embedding"
|
|
||||||
or isinstance(response_obj, litellm.EmbeddingResponse)
|
|
||||||
):
|
|
||||||
output = None
|
|
||||||
elif response_obj is not None and isinstance(
|
|
||||||
response_obj, litellm.ModelResponse
|
|
||||||
):
|
|
||||||
output_list = []
|
|
||||||
for choice in response_obj.choices:
|
|
||||||
output_list.append(choice.json())
|
|
||||||
output = output_list
|
|
||||||
elif response_obj is not None and isinstance(
|
|
||||||
response_obj, litellm.TextCompletionResponse
|
|
||||||
):
|
|
||||||
output_list = []
|
|
||||||
for choice in response_obj.choices:
|
|
||||||
output_list.append(choice.json())
|
|
||||||
output = output_list
|
|
||||||
elif response_obj is not None and isinstance(
|
|
||||||
response_obj, litellm.ImageResponse
|
|
||||||
):
|
|
||||||
output = response_obj["data"]
|
|
||||||
elif response_obj is not None and isinstance(
|
|
||||||
response_obj, litellm.TranscriptionResponse
|
|
||||||
):
|
|
||||||
output = response_obj["text"]
|
|
||||||
|
|
||||||
gcs_payload["output"] = output
|
|
||||||
return gcs_payload
|
return gcs_payload
|
||||||
|
|
||||||
async def download_gcs_object(self, object_name):
|
async def download_gcs_object(self, object_name):
|
||||||
|
|
22
litellm/litellm_core_utils/logging_utils.py
Normal file
22
litellm/litellm_core_utils/logging_utils.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
"""
|
||||||
|
Helper utils used for logging callbacks
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def convert_litellm_response_object_to_dict(response_obj: Any) -> dict:
|
||||||
|
"""
|
||||||
|
Convert a LiteLLM response object to a dictionary
|
||||||
|
|
||||||
|
"""
|
||||||
|
if isinstance(response_obj, dict):
|
||||||
|
return response_obj
|
||||||
|
for _type in litellm.ALL_LITELLM_RESPONSE_TYPES:
|
||||||
|
if isinstance(response_obj, _type):
|
||||||
|
return response_obj.model_dump()
|
||||||
|
|
||||||
|
# If it's not a LiteLLM type, return the object as is
|
||||||
|
return dict(response_obj)
|
|
@ -15,7 +15,7 @@ import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.gcs_bucket import GCSBucketLogger
|
from litellm.integrations.gcs_bucket import GCSBucketLogger, GCSBucketPayload
|
||||||
|
|
||||||
verbose_logger.setLevel(logging.DEBUG)
|
verbose_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
@ -87,9 +87,15 @@ async def test_basic_gcs_logger():
|
||||||
object_from_gcs = json.loads(object_from_gcs)
|
object_from_gcs = json.loads(object_from_gcs)
|
||||||
print("object_from_gcs", object_from_gcs)
|
print("object_from_gcs", object_from_gcs)
|
||||||
|
|
||||||
assert object_from_gcs["request_id"] == response.id
|
gcs_payload = GCSBucketPayload(**object_from_gcs)
|
||||||
assert object_from_gcs["call_type"] == "acompletion"
|
|
||||||
assert object_from_gcs["model"] == "gpt-3.5-turbo"
|
print("gcs_payload", gcs_payload)
|
||||||
|
|
||||||
|
assert gcs_payload["request_kwargs"]["model"] == "gpt-3.5-turbo"
|
||||||
|
assert gcs_payload["request_kwargs"]["messages"] == [
|
||||||
|
{"role": "user", "content": "This is a test"}
|
||||||
|
]
|
||||||
|
assert gcs_payload["response_obj"]["choices"][0]["message"]["content"] == "Hi!"
|
||||||
|
|
||||||
# Delete Object from GCS
|
# Delete Object from GCS
|
||||||
print("deleting object from GCS")
|
print("deleting object from GCS")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue