fix(proxy_server.py): support langfuse logging for rejected requests on /v1/chat/completions

2024-07-05 13:07:09 -07:00 · 2024-07-05 13:07:09 -07:00 · 7618ec43b3
commit 7618ec43b3
parent 017af34866
7 changed files with 74 additions and 33 deletions
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -447,13 +447,24 @@ class OpenTelemetry(CustomLogger):
            # cast sr -> dict
            import json

-            _raw_response = json.loads(_raw_response)
-            for param, val in _raw_response.items():
-                if not isinstance(val, str):
-                    val = str(val)
+            try:
+                _raw_response = json.loads(_raw_response)
+                for param, val in _raw_response.items():
+                    if not isinstance(val, str):
+                        val = str(val)
+                    span.set_attribute(
+                        f"llm.{custom_llm_provider}.{param}",
+                        val,
+                    )
+            except json.JSONDecodeError:
+                verbose_logger.debug(
+                    "litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
+                        _raw_response
+                    )
+                )
                span.set_attribute(
-                    f"llm.{custom_llm_provider}.{param}",
-                    val,
+                    f"llm.{custom_llm_provider}.stringified_raw_response",
+                    _raw_response,
                )

        pass
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM):
        content_str = ""
        tools: List[ChatCompletionToolCallChunk] = []
        if message is not None:
-            for content in message["content"]:
+            for idx, content in enumerate(message["content"]):
                """
                - Content is either a tool response or text
                """
@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM):
                        id=content["toolUse"]["toolUseId"],
                        type="function",
                        function=_function_chunk,
+                        index=idx,
                    )
                    tools.append(_tool_response_chunk)
        chat_completion_message["content"] = content_str
@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder:
                            "name": start_obj["toolUse"]["name"],
                            "arguments": "",
                        },
+                        "index": index,
                    }
            elif "delta" in chunk_data:
                delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder:
                            "name": None,
                            "arguments": delta_obj["toolUse"]["input"],
                        },
+                        "index": index,
                    }
            elif "stopReason" in chunk_data:
                finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -687,6 +687,7 @@ class VertexLLM(BaseLLM):
                        id=f"call_{str(uuid.uuid4())}",
                        type="function",
                        function=_function_chunk,
+                        index=candidate.get("index", idx),
                    )
                    tools.append(_tool_response_chunk)

--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,12 +1,12 @@
 model_list:
-  - model_name: claude-3-5-sonnet             # all requests where model not in your config go to this deployment
+  - model_name: "*"             
    litellm_params:
      model: "openai/*"
      mock_response: "Hello world!"

 litellm_settings:
-  callbacks: ["otel"]
-  cache: True
+  success_callback: ["langfuse"]
+  failure_callback: ["langfuse"]

 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2753,12 +2753,8 @@ async def chat_completion(
        if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
            data["model"] = litellm.model_alias_map[data["model"]]

-        ### CALL HOOKS ### - modify/reject incoming data before calling the model
-        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
-            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
-        )
-
        ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
+        ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
        data["litellm_call_id"] = str(uuid.uuid4())
        logging_obj, data = litellm.utils.function_setup(
            original_function="acompletion",
@ -2769,6 +2765,11 @@ async def chat_completion(

        data["litellm_logging_obj"] = logging_obj

+        ### CALL HOOKS ### - modify/reject incoming data before calling the model
+        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
+            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
+        )
+
        tasks = []
        tasks.append(
            proxy_logging_obj.during_call_hook(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache
 from litellm.exceptions import RejectedRequestError
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.slack_alerting import SlackAlerting
+from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from litellm.proxy._types import (
    AlertType,
@ -595,6 +596,23 @@ class ProxyLogging:
                )
            )

+        ### LOGGING ###
+        litellm_logging_obj: Optional[Logging] = request_data.get(
+            "litellm_logging_obj", None
+        )
+
+        if (
+            isinstance(original_exception, HTTPException)
+            and litellm_logging_obj is not None
+        ):
+            # log the custom exception
+            await litellm_logging_obj.async_failure_handler(
+                exception=original_exception,
+                traceback_exception=traceback.format_exc(),
+                start_time=time.time(),
+                end_time=time.time(),
+            )
+
        for callback in litellm.callbacks:
            try:
                _callback: Optional[CustomLogger] = None
@ -611,6 +629,7 @@ class ProxyLogging:
                    )
            except Exception as e:
                raise e
+
        return

    async def post_call_success_hook(
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -1,32 +1,37 @@
+from os import PathLike
 from typing import (
-    Optional,
-    Union,
+    IO,
    Any,
    BinaryIO,
-    Literal,
    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Tuple,
+    TypedDict,
+    Union,
 )
-from typing_extensions import override, Required, Dict
-from pydantic import BaseModel
-from openai.types.beta.threads.message_content import MessageContent
-from openai.types.beta.threads.message import Message as OpenAIMessage
-from openai.types.beta.thread_create_params import (
-    Message as OpenAICreateThreadParamsMessage,
-)
+
+from openai._legacy_response import HttpxBinaryResponseContent
 from openai.lib.streaming._assistants import (
    AssistantEventHandler,
    AssistantStreamManager,
-    AsyncAssistantStreamManager,
    AsyncAssistantEventHandler,
+    AsyncAssistantStreamManager,
 )
-from openai.types.beta.assistant_tool_param import AssistantToolParam
-from openai.types.beta.threads.run import Run
+from openai.pagination import AsyncCursorPage, SyncCursorPage
+from openai.types import Batch, FileObject
 from openai.types.beta.assistant import Assistant
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from os import PathLike
-from openai.types import FileObject, Batch
-from openai._legacy_response import HttpxBinaryResponseContent
-from typing import TypedDict, List, Optional, Tuple, Mapping, IO
+from openai.types.beta.assistant_tool_param import AssistantToolParam
+from openai.types.beta.thread_create_params import (
+    Message as OpenAICreateThreadParamsMessage,
+)
+from openai.types.beta.threads.message import Message as OpenAIMessage
+from openai.types.beta.threads.message_content import MessageContent
+from openai.types.beta.threads.run import Run
+from pydantic import BaseModel
+from typing_extensions import Dict, Required, override

 FileContent = Union[IO[bytes], bytes, PathLike]

@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict):
    id: Optional[str]
    type: Literal["function"]
    function: ChatCompletionToolCallFunctionChunk
+    index: int


 class ChatCompletionDeltaToolCallChunk(TypedDict):