From 7618ec43b3368713f58d0a80d7daa11794420941 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 5 Jul 2024 13:07:09 -0700 Subject: [PATCH] fix(proxy_server.py): support langfuse logging for rejected requests on `/v1/chat/completions` --- litellm/integrations/opentelemetry.py | 23 +++++++++++---- litellm/llms/bedrock_httpx.py | 5 +++- litellm/llms/vertex_httpx.py | 1 + litellm/proxy/_new_secret_config.yaml | 6 ++-- litellm/proxy/proxy_server.py | 11 +++---- litellm/proxy/utils.py | 19 ++++++++++++ litellm/types/llms/openai.py | 42 +++++++++++++++------------ 7 files changed, 74 insertions(+), 33 deletions(-) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index fa7be1d57..122b43444 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -447,13 +447,24 @@ class OpenTelemetry(CustomLogger): # cast sr -> dict import json - _raw_response = json.loads(_raw_response) - for param, val in _raw_response.items(): - if not isinstance(val, str): - val = str(val) + try: + _raw_response = json.loads(_raw_response) + for param, val in _raw_response.items(): + if not isinstance(val, str): + val = str(val) + span.set_attribute( + f"llm.{custom_llm_provider}.{param}", + val, + ) + except json.JSONDecodeError: + verbose_logger.debug( + "litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format( + _raw_response + ) + ) span.set_attribute( - f"llm.{custom_llm_provider}.{param}", - val, + f"llm.{custom_llm_provider}.stringified_raw_response", + _raw_response, ) pass diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 7b4628a76..b558bac5f 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM): content_str = "" tools: List[ChatCompletionToolCallChunk] = [] if message is not None: - for content in message["content"]: + for idx, content in enumerate(message["content"]): """ - Content is either a tool response or text """ @@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM): id=content["toolUse"]["toolUseId"], type="function", function=_function_chunk, + index=idx, ) tools.append(_tool_response_chunk) chat_completion_message["content"] = content_str @@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder: "name": start_obj["toolUse"]["name"], "arguments": "", }, + "index": index, } elif "delta" in chunk_data: delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"]) @@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder: "name": None, "arguments": delta_obj["toolUse"]["input"], }, + "index": index, } elif "stopReason" in chunk_data: finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop")) diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index d56237b73..35504a490 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -687,6 +687,7 @@ class VertexLLM(BaseLLM): id=f"call_{str(uuid.uuid4())}", type="function", function=_function_chunk, + index=candidate.get("index", idx), ) tools.append(_tool_response_chunk) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index dba1a4682..7f4b86ec4 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,12 +1,12 @@ model_list: - - model_name: claude-3-5-sonnet # all requests where model not in your config go to this deployment + - model_name: "*" litellm_params: model: "openai/*" mock_response: "Hello world!" litellm_settings: - callbacks: ["otel"] - cache: True + success_callback: ["langfuse"] + failure_callback: ["langfuse"] general_settings: alerting: ["slack"] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f388db4a7..a99f920f9 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2753,12 +2753,8 @@ async def chat_completion( if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map: data["model"] = litellm.model_alias_map[data["model"]] - ### CALL HOOKS ### - modify/reject incoming data before calling the model - data = await proxy_logging_obj.pre_call_hook( # type: ignore - user_api_key_dict=user_api_key_dict, data=data, call_type="completion" - ) - ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call + ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse. data["litellm_call_id"] = str(uuid.uuid4()) logging_obj, data = litellm.utils.function_setup( original_function="acompletion", @@ -2769,6 +2765,11 @@ async def chat_completion( data["litellm_logging_obj"] = logging_obj + ### CALL HOOKS ### - modify/reject incoming data before calling the model + data = await proxy_logging_obj.pre_call_hook( # type: ignore + user_api_key_dict=user_api_key_dict, data=data, call_type="completion" + ) + tasks = [] tasks.append( proxy_logging_obj.during_call_hook( diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 179d09466..c492396da 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache from litellm.exceptions import RejectedRequestError from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.slack_alerting import SlackAlerting +from litellm.litellm_core_utils.litellm_logging import Logging from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from litellm.proxy._types import ( AlertType, @@ -595,6 +596,23 @@ class ProxyLogging: ) ) + ### LOGGING ### + litellm_logging_obj: Optional[Logging] = request_data.get( + "litellm_logging_obj", None + ) + + if ( + isinstance(original_exception, HTTPException) + and litellm_logging_obj is not None + ): + # log the custom exception + await litellm_logging_obj.async_failure_handler( + exception=original_exception, + traceback_exception=traceback.format_exc(), + start_time=time.time(), + end_time=time.time(), + ) + for callback in litellm.callbacks: try: _callback: Optional[CustomLogger] = None @@ -611,6 +629,7 @@ class ProxyLogging: ) except Exception as e: raise e + return async def post_call_success_hook( diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 88f498ede..64dee3420 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -1,32 +1,37 @@ +from os import PathLike from typing import ( - Optional, - Union, + IO, Any, BinaryIO, - Literal, Iterable, + List, + Literal, + Mapping, + Optional, + Tuple, + TypedDict, + Union, ) -from typing_extensions import override, Required, Dict -from pydantic import BaseModel -from openai.types.beta.threads.message_content import MessageContent -from openai.types.beta.threads.message import Message as OpenAIMessage -from openai.types.beta.thread_create_params import ( - Message as OpenAICreateThreadParamsMessage, -) + +from openai._legacy_response import HttpxBinaryResponseContent from openai.lib.streaming._assistants import ( AssistantEventHandler, AssistantStreamManager, - AsyncAssistantStreamManager, AsyncAssistantEventHandler, + AsyncAssistantStreamManager, ) -from openai.types.beta.assistant_tool_param import AssistantToolParam -from openai.types.beta.threads.run import Run +from openai.pagination import AsyncCursorPage, SyncCursorPage +from openai.types import Batch, FileObject from openai.types.beta.assistant import Assistant -from openai.pagination import SyncCursorPage, AsyncCursorPage -from os import PathLike -from openai.types import FileObject, Batch -from openai._legacy_response import HttpxBinaryResponseContent -from typing import TypedDict, List, Optional, Tuple, Mapping, IO +from openai.types.beta.assistant_tool_param import AssistantToolParam +from openai.types.beta.thread_create_params import ( + Message as OpenAICreateThreadParamsMessage, +) +from openai.types.beta.threads.message import Message as OpenAIMessage +from openai.types.beta.threads.message_content import MessageContent +from openai.types.beta.threads.run import Run +from pydantic import BaseModel +from typing_extensions import Dict, Required, override FileContent = Union[IO[bytes], bytes, PathLike] @@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict): id: Optional[str] type: Literal["function"] function: ChatCompletionToolCallFunctionChunk + index: int class ChatCompletionDeltaToolCallChunk(TypedDict):