fix(proxy_server.py): support langfuse logging for rejected requests on /v1/chat/completions

This commit is contained in:
Krrish Dholakia 2024-07-05 13:07:09 -07:00
parent 017af34866
commit 7618ec43b3
7 changed files with 74 additions and 33 deletions

View file

@ -447,13 +447,24 @@ class OpenTelemetry(CustomLogger):
# cast sr -> dict
import json
_raw_response = json.loads(_raw_response)
for param, val in _raw_response.items():
if not isinstance(val, str):
val = str(val)
try:
_raw_response = json.loads(_raw_response)
for param, val in _raw_response.items():
if not isinstance(val, str):
val = str(val)
span.set_attribute(
f"llm.{custom_llm_provider}.{param}",
val,
)
except json.JSONDecodeError:
verbose_logger.debug(
"litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
_raw_response
)
)
span.set_attribute(
f"llm.{custom_llm_provider}.{param}",
val,
f"llm.{custom_llm_provider}.stringified_raw_response",
_raw_response,
)
pass

View file

@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM):
content_str = ""
tools: List[ChatCompletionToolCallChunk] = []
if message is not None:
for content in message["content"]:
for idx, content in enumerate(message["content"]):
"""
- Content is either a tool response or text
"""
@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM):
id=content["toolUse"]["toolUseId"],
type="function",
function=_function_chunk,
index=idx,
)
tools.append(_tool_response_chunk)
chat_completion_message["content"] = content_str
@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder:
"name": start_obj["toolUse"]["name"],
"arguments": "",
},
"index": index,
}
elif "delta" in chunk_data:
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder:
"name": None,
"arguments": delta_obj["toolUse"]["input"],
},
"index": index,
}
elif "stopReason" in chunk_data:
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))

View file

@ -687,6 +687,7 @@ class VertexLLM(BaseLLM):
id=f"call_{str(uuid.uuid4())}",
type="function",
function=_function_chunk,
index=candidate.get("index", idx),
)
tools.append(_tool_response_chunk)

View file

@ -1,12 +1,12 @@
model_list:
- model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment
- model_name: "*"
litellm_params:
model: "openai/*"
mock_response: "Hello world!"
litellm_settings:
callbacks: ["otel"]
cache: True
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
general_settings:
alerting: ["slack"]

View file

@ -2753,12 +2753,8 @@ async def chat_completion(
if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
data["model"] = litellm.model_alias_map[data["model"]]
### CALL HOOKS ### - modify/reject incoming data before calling the model
data = await proxy_logging_obj.pre_call_hook( # type: ignore
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
)
## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
data["litellm_call_id"] = str(uuid.uuid4())
logging_obj, data = litellm.utils.function_setup(
original_function="acompletion",
@ -2769,6 +2765,11 @@ async def chat_completion(
data["litellm_logging_obj"] = logging_obj
### CALL HOOKS ### - modify/reject incoming data before calling the model
data = await proxy_logging_obj.pre_call_hook( # type: ignore
user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
)
tasks = []
tasks.append(
proxy_logging_obj.during_call_hook(

View file

@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache
from litellm.exceptions import RejectedRequestError
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.slack_alerting import SlackAlerting
from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.proxy._types import (
AlertType,
@ -595,6 +596,23 @@ class ProxyLogging:
)
)
### LOGGING ###
litellm_logging_obj: Optional[Logging] = request_data.get(
"litellm_logging_obj", None
)
if (
isinstance(original_exception, HTTPException)
and litellm_logging_obj is not None
):
# log the custom exception
await litellm_logging_obj.async_failure_handler(
exception=original_exception,
traceback_exception=traceback.format_exc(),
start_time=time.time(),
end_time=time.time(),
)
for callback in litellm.callbacks:
try:
_callback: Optional[CustomLogger] = None
@ -611,6 +629,7 @@ class ProxyLogging:
)
except Exception as e:
raise e
return
async def post_call_success_hook(

View file

@ -1,32 +1,37 @@
from os import PathLike
from typing import (
Optional,
Union,
IO,
Any,
BinaryIO,
Literal,
Iterable,
List,
Literal,
Mapping,
Optional,
Tuple,
TypedDict,
Union,
)
from typing_extensions import override, Required, Dict
from pydantic import BaseModel
from openai.types.beta.threads.message_content import MessageContent
from openai.types.beta.threads.message import Message as OpenAIMessage
from openai.types.beta.thread_create_params import (
Message as OpenAICreateThreadParamsMessage,
)
from openai._legacy_response import HttpxBinaryResponseContent
from openai.lib.streaming._assistants import (
AssistantEventHandler,
AssistantStreamManager,
AsyncAssistantStreamManager,
AsyncAssistantEventHandler,
AsyncAssistantStreamManager,
)
from openai.types.beta.assistant_tool_param import AssistantToolParam
from openai.types.beta.threads.run import Run
from openai.pagination import AsyncCursorPage, SyncCursorPage
from openai.types import Batch, FileObject
from openai.types.beta.assistant import Assistant
from openai.pagination import SyncCursorPage, AsyncCursorPage
from os import PathLike
from openai.types import FileObject, Batch
from openai._legacy_response import HttpxBinaryResponseContent
from typing import TypedDict, List, Optional, Tuple, Mapping, IO
from openai.types.beta.assistant_tool_param import AssistantToolParam
from openai.types.beta.thread_create_params import (
Message as OpenAICreateThreadParamsMessage,
)
from openai.types.beta.threads.message import Message as OpenAIMessage
from openai.types.beta.threads.message_content import MessageContent
from openai.types.beta.threads.run import Run
from pydantic import BaseModel
from typing_extensions import Dict, Required, override
FileContent = Union[IO[bytes], bytes, PathLike]
@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict):
id: Optional[str]
type: Literal["function"]
function: ChatCompletionToolCallFunctionChunk
index: int
class ChatCompletionDeltaToolCallChunk(TypedDict):