fix(proxy_server.py): support langfuse logging for rejected requests on /v1/chat/completions

2024-07-05 13:07:09 -07:00 · 2024-07-05 13:07:09 -07:00 · 7618ec43b3
commit 7618ec43b3
parent 017af34866
7 changed files with 74 additions and 33 deletions
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -447,6 +447,7 @@ class OpenTelemetry(CustomLogger):
            # cast sr -> dict
            import json
            try:
                _raw_response = json.loads(_raw_response)
                for param, val in _raw_response.items():
                    if not isinstance(val, str):
@ -455,6 +456,16 @@ class OpenTelemetry(CustomLogger):
                        f"llm.{custom_llm_provider}.{param}",
                        val,
                    )
            except json.JSONDecodeError:
                verbose_logger.debug(
                    "litellm.integrations.opentelemetry.py::set_raw_request_attributes() - raw_response not json string - {}".format(
                        _raw_response
                    )
                )
                span.set_attribute(
                    f"llm.{custom_llm_provider}.stringified_raw_response",
                    _raw_response,
                )
        pass
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@ -1394,7 +1394,7 @@ class BedrockConverseLLM(BaseLLM):
        content_str = ""
        tools: List[ChatCompletionToolCallChunk] = []
        if message is not None:
-            for content in message["content"]:
+            for idx, content in enumerate(message["content"]):
                """
                - Content is either a tool response or text
                """
@ -1409,6 +1409,7 @@ class BedrockConverseLLM(BaseLLM):
                        id=content["toolUse"]["toolUseId"],
                        type="function",
                        function=_function_chunk,
                        index=idx,
                    )
                    tools.append(_tool_response_chunk)
        chat_completion_message["content"] = content_str
@ -2001,6 +2002,7 @@ class AWSEventStreamDecoder:
                            "name": start_obj["toolUse"]["name"],
                            "arguments": "",
                        },
                        "index": index,
                    }
            elif "delta" in chunk_data:
                delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
@ -2014,6 +2016,7 @@ class AWSEventStreamDecoder:
                            "name": None,
                            "arguments": delta_obj["toolUse"]["input"],
                        },
                        "index": index,
                    }
            elif "stopReason" in chunk_data:
                finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@ -687,6 +687,7 @@ class VertexLLM(BaseLLM):
                        id=f"call_{str(uuid.uuid4())}",
                        type="function",
                        function=_function_chunk,
                        index=candidate.get("index", idx),
                    )
                    tools.append(_tool_response_chunk)
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,12 +1,12 @@
 model_list:
-  - model_name: claude-3-5-sonnet             # all requests where model not in your config go to this deployment
+  - model_name: "*"             
    litellm_params:
      model: "openai/*"
      mock_response: "Hello world!"
 litellm_settings:
-  callbacks: ["otel"]
+  success_callback: ["langfuse"]
-  cache: True
+  failure_callback: ["langfuse"]
 general_settings:
  alerting: ["slack"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2753,12 +2753,8 @@ async def chat_completion(
        if isinstance(data["model"], str) and data["model"] in litellm.model_alias_map:
            data["model"] = litellm.model_alias_map[data["model"]]
        ### CALL HOOKS ### - modify/reject incoming data before calling the model
        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
        )
        ## LOGGING OBJECT ## - initialize logging object for logging success/failure events for call
        ## IMPORTANT Note: - initialize this before running pre-call checks. Ensures we log rejected requests to langfuse.
        data["litellm_call_id"] = str(uuid.uuid4())
        logging_obj, data = litellm.utils.function_setup(
            original_function="acompletion",
@ -2769,6 +2765,11 @@ async def chat_completion(
        data["litellm_logging_obj"] = logging_obj
        ### CALL HOOKS ### - modify/reject incoming data before calling the model
        data = await proxy_logging_obj.pre_call_hook(  # type: ignore
            user_api_key_dict=user_api_key_dict, data=data, call_type="completion"
        )
        tasks = []
        tasks.append(
            proxy_logging_obj.during_call_hook(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -31,6 +31,7 @@ from litellm.caching import DualCache, RedisCache
 from litellm.exceptions import RejectedRequestError
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.slack_alerting import SlackAlerting
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from litellm.proxy._types import (
    AlertType,
@ -595,6 +596,23 @@ class ProxyLogging:
                )
            )
        ### LOGGING ###
        litellm_logging_obj: Optional[Logging] = request_data.get(
            "litellm_logging_obj", None
        )
        if (
            isinstance(original_exception, HTTPException)
            and litellm_logging_obj is not None
        ):
            # log the custom exception
            await litellm_logging_obj.async_failure_handler(
                exception=original_exception,
                traceback_exception=traceback.format_exc(),
                start_time=time.time(),
                end_time=time.time(),
            )
        for callback in litellm.callbacks:
            try:
                _callback: Optional[CustomLogger] = None
@ -611,6 +629,7 @@ class ProxyLogging:
                    )
            except Exception as e:
                raise e
        return
    async def post_call_success_hook(
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -1,32 +1,37 @@
 from os import PathLike
 from typing import (
-    Optional,
+    IO,
    Union,
    Any,
    BinaryIO,
    Literal,
    Iterable,
    List,
    Literal,
    Mapping,
    Optional,
    Tuple,
    TypedDict,
    Union,
 )
-from typing_extensions import override, Required, Dict
+
-from pydantic import BaseModel
+from openai._legacy_response import HttpxBinaryResponseContent
 from openai.types.beta.threads.message_content import MessageContent
 from openai.types.beta.threads.message import Message as OpenAIMessage
 from openai.types.beta.thread_create_params import (
    Message as OpenAICreateThreadParamsMessage,
 )
 from openai.lib.streaming._assistants import (
    AssistantEventHandler,
    AssistantStreamManager,
    AsyncAssistantStreamManager,
    AsyncAssistantEventHandler,
    AsyncAssistantStreamManager,
 )
-from openai.types.beta.assistant_tool_param import AssistantToolParam
+from openai.pagination import AsyncCursorPage, SyncCursorPage
-from openai.types.beta.threads.run import Run
+from openai.types import Batch, FileObject
 from openai.types.beta.assistant import Assistant
-from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.beta.assistant_tool_param import AssistantToolParam
-from os import PathLike
+from openai.types.beta.thread_create_params import (
-from openai.types import FileObject, Batch
+    Message as OpenAICreateThreadParamsMessage,
-from openai._legacy_response import HttpxBinaryResponseContent
+)
-from typing import TypedDict, List, Optional, Tuple, Mapping, IO
+from openai.types.beta.threads.message import Message as OpenAIMessage
 from openai.types.beta.threads.message_content import MessageContent
 from openai.types.beta.threads.run import Run
 from pydantic import BaseModel
 from typing_extensions import Dict, Required, override
 FileContent = Union[IO[bytes], bytes, PathLike]
@ -304,6 +309,7 @@ class ChatCompletionToolCallChunk(TypedDict):
    id: Optional[str]
    type: Literal["function"]
    function: ChatCompletionToolCallFunctionChunk
    index: int
 class ChatCompletionDeltaToolCallChunk(TypedDict):