From 7618ec43b3368713f58d0a80d7daa11794420941 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 5 Jul 2024 13:07:09 -0700
Subject: [PATCH] fix(proxy_server.py): support langfuse logging for rejected
 requests on `/v1/chat/completions`

---
 litellm/integrations/opentelemetry.py | 23 +++++++++++----
 litellm/llms/bedrock_httpx.py         |  5 +++-
 litellm/llms/vertex_httpx.py          |  1 +
 litellm/proxy/_new_secret_config.yaml |  6 ++--
 litellm/proxy/proxy_server.py         | 11 +++----
 litellm/proxy/utils.py                | 19 ++++++++++++
 litellm/types/llms/openai.py          | 42 +++++++++++++++------------
 7 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
index fa7be1d57..122b43444 100644
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@@ -447,13 +447,24 @@ class OpenTelemetry(CustomLogger):
             # cast sr -> dict
             import json
 
-            _raw_response = json.loads(_raw_response)
-            for param, val in _raw_response.items():
-                if not isinstance(val, str):
-                    val = str(val)
+            try:
+                _raw_response = json.loads(_raw_response)
+                for param, val in _raw_response.items():
+                    if not isinstance(val, str):
+                        val = str(val)
+                    span.set_attribute(
+                        f"llm.{custom_llm_provider}.{param}",
+                        val,
+                    )
+            except json.JSONDecodeError:
+                verbose_logger.debug(
+                    "litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
+                        _raw_response
+                    )
+                )
                 span.set_attribute(
-                    f"llm.{custom_llm_provider}.{param}",
-                    val,
+                    f"llm.{custom_llm_provider}.stringified_raw_response",
+                    _raw_response,
                 )
 
         pass
diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 7b4628a76..b558bac5f 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM):
         content_str = ""
         tools: List[ChatCompletionToolCallChunk] = []
         if message is not None:
-            for content in message["content"]:
+            for idx, content in enumerate(message["content"]):
                 """
                 - Content is either a tool response or text
                 """
@@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM):
                         id=content["toolUse"]["toolUseId"],
                         type="function",
                         function=_function_chunk,
+                        index=idx,
                     )
                     tools.append(_tool_response_chunk)
         chat_completion_message["content"] = content_str
@@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder:
                             "name": start_obj["toolUse"]["name"],
                             "arguments": "",
                         },
+                        "index": index,
                     }
             elif "delta" in chunk_data:
                 delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
@@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder:
                             "name": None,
                             "arguments": delta_obj["toolUse"]["input"],
                         },
+                        "index": index,
                     }
             elif "stopReason" in chunk_data:
                 finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index d56237b73..35504a490 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -687,6 +687,7 @@ class VertexLLM(BaseLLM):
                         id=f"call_{str(uuid.uuid4())}",
                         type="function",
                         function=_function_chunk,
+                        index=candidate.get("index", idx),
                     )
                     tools.append(_tool_response_chunk)
 
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index dba1a4682..7f4b86ec4 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,12 +1,12 @@
 model_list:
-  - model_name: claude-3-5-sonnet             # all requests where model not in your config go to this deployment
+  - model_name: "*"             
     litellm_params:
       model: "openai/*"
       mock_response: "Hello world!"
 
 litellm_settings:
-  callbacks: ["otel"]
-  cache: True
+  success_callback: ["langfuse"]
+  failure_callback: ["langfuse"]
 
 general_settings:
   alerting: ["slack"]
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index f388db4a7..a99f920f9 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2753,12 +2753,8 @@ async def chat_completion(
         if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
             data["model"] = litellm.model_alias_map[data["model"]]
 
-        ### CALL HOOKS ### - modify/reject incoming data before calling the model
-        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
-            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
-        )
-
         ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
+        ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
         data["litellm_call_id"] = str(uuid.uuid4())
         logging_obj, data = litellm.utils.function_setup(
             original_function="acompletion",
@@ -2769,6 +2765,11 @@ async def chat_completion(
 
         data["litellm_logging_obj"] = logging_obj
 
+        ### CALL HOOKS ### - modify/reject incoming data before calling the model
+        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
+            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
+        )
+
         tasks = []
         tasks.append(
             proxy_logging_obj.during_call_hook(
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 179d09466..c492396da 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache
 from litellm.exceptions import RejectedRequestError
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.slack_alerting import SlackAlerting
+from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from litellm.proxy._types import (
     AlertType,
@@ -595,6 +596,23 @@ class ProxyLogging:
                 )
             )
 
+        ### LOGGING ###
+        litellm_logging_obj: Optional[Logging] = request_data.get(
+            "litellm_logging_obj", None
+        )
+
+        if (
+            isinstance(original_exception, HTTPException)
+            and litellm_logging_obj is not None
+        ):
+            # log the custom exception
+            await litellm_logging_obj.async_failure_handler(
+                exception=original_exception,
+                traceback_exception=traceback.format_exc(),
+                start_time=time.time(),
+                end_time=time.time(),
+            )
+
         for callback in litellm.callbacks:
             try:
                 _callback: Optional[CustomLogger] = None
@@ -611,6 +629,7 @@ class ProxyLogging:
                     )
             except Exception as e:
                 raise e
+
         return
 
     async def post_call_success_hook(
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 88f498ede..64dee3420 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -1,32 +1,37 @@
+from os import PathLike
 from typing import (
-    Optional,
-    Union,
+    IO,
     Any,
     BinaryIO,
-    Literal,
     Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Tuple,
+    TypedDict,
+    Union,
 )
-from typing_extensions import override, Required, Dict
-from pydantic import BaseModel
-from openai.types.beta.threads.message_content import MessageContent
-from openai.types.beta.threads.message import Message as OpenAIMessage
-from openai.types.beta.thread_create_params import (
-    Message as OpenAICreateThreadParamsMessage,
-)
+
+from openai._legacy_response import HttpxBinaryResponseContent
 from openai.lib.streaming._assistants import (
     AssistantEventHandler,
     AssistantStreamManager,
-    AsyncAssistantStreamManager,
     AsyncAssistantEventHandler,
+    AsyncAssistantStreamManager,
 )
-from openai.types.beta.assistant_tool_param import AssistantToolParam
-from openai.types.beta.threads.run import Run
+from openai.pagination import AsyncCursorPage, SyncCursorPage
+from openai.types import Batch, FileObject
 from openai.types.beta.assistant import Assistant
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from os import PathLike
-from openai.types import FileObject, Batch
-from openai._legacy_response import HttpxBinaryResponseContent
-from typing import TypedDict, List, Optional, Tuple, Mapping, IO
+from openai.types.beta.assistant_tool_param import AssistantToolParam
+from openai.types.beta.thread_create_params import (
+    Message as OpenAICreateThreadParamsMessage,
+)
+from openai.types.beta.threads.message import Message as OpenAIMessage
+from openai.types.beta.threads.message_content import MessageContent
+from openai.types.beta.threads.run import Run
+from pydantic import BaseModel
+from typing_extensions import Dict, Required, override
 
 FileContent = Union[IO[bytes], bytes, PathLike]
 
@@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict):
     id: Optional[str]
     type: Literal["function"]
     function: ChatCompletionToolCallFunctionChunk
+    index: int
 
 
 class ChatCompletionDeltaToolCallChunk(TypedDict):