Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)

* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type allows switching between gemini/vllm easily * [WIP] redacted thinking tests (#9044) * WIP: redacted thinking tests * test: add test for redacted thinking in assistant message --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(bedrock/): support redacted text on streaming + non-streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy allows using reasoning effort with thinking models on proxy * test: update tests * fix(utils.py): fix linting error * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(anthropic/chat/transformation.py): fix returning citations in chat completion --------- Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
2025-04-25 18:54:30 +00:00 · 2025-04-19 11:16:37 -07:00 · 2025-04-19 11:16:37 -07:00 · f08a4e3c06
commit f08a4e3c06
parent 3c463f6715
20 changed files with 638 additions and 109 deletions
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
 )
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:

    def translate_thinking_blocks(
        self, thinking_block: BedrockConverseReasoningContentBlockDelta
-    ) -> Optional[List[ChatCompletionThinkingBlock]]:
+    ) -> Optional[
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
+    ]:
        """
        Translate the thinking blocks to a string
        """

-        thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
-        _thinking_block = ChatCompletionThinkingBlock(type="thinking")
+        thinking_blocks_list: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = []
+        _thinking_block: Optional[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = None
+
        if "text" in thinking_block:
+            _thinking_block = ChatCompletionThinkingBlock(type="thinking")
            _thinking_block["thinking"] = thinking_block["text"]
        elif "signature" in thinking_block:
+            _thinking_block = ChatCompletionThinkingBlock(type="thinking")
            _thinking_block["signature"] = thinking_block["signature"]
            _thinking_block["thinking"] = ""  # consistent with anthropic response
-        thinking_blocks_list.append(_thinking_block)
+        elif "redactedContent" in thinking_block:
+            _thinking_block = ChatCompletionRedactedThinkingBlock(
+                type="redacted_thinking", data=thinking_block["redactedContent"]
+            )
+        if _thinking_block is not None:
+            thinking_blocks_list.append(_thinking_block)
        return thinking_blocks_list

    def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
            usage: Optional[Usage] = None
            provider_specific_fields: dict = {}
            reasoning_content: Optional[str] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
+                List[
+                    Union[
+                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
+                    ]
+                ]
+            ] = None

            index = int(chunk_data.get("contentBlockIndex", 0))
            if "start" in chunk_data:
                start_obj = ContentBlockStartEvent(**chunk_data["start"])
                self.content_blocks = []  # reset
-                if (
-                    start_obj is not None
-                    and "toolUse" in start_obj
-                    and start_obj["toolUse"] is not None
-                ):
-                    ## check tool name was formatted by litellm
-                    _response_tool_name = start_obj["toolUse"]["name"]
-                    response_tool_name = get_bedrock_tool_name(
-                        response_tool_name=_response_tool_name
-                    )
-                    tool_use = {
-                        "id": start_obj["toolUse"]["toolUseId"],
-                        "type": "function",
-                        "function": {
-                            "name": response_tool_name,
-                            "arguments": "",
-                        },
-                        "index": index,
-                    }
+                if start_obj is not None:
+                    if "toolUse" in start_obj and start_obj["toolUse"] is not None:
+                        ## check tool name was formatted by litellm
+                        _response_tool_name = start_obj["toolUse"]["name"]
+                        response_tool_name = get_bedrock_tool_name(
+                            response_tool_name=_response_tool_name
+                        )
+                        tool_use = {
+                            "id": start_obj["toolUse"]["toolUseId"],
+                            "type": "function",
+                            "function": {
+                                "name": response_tool_name,
+                                "arguments": "",
+                            },
+                            "index": index,
+                        }
+                    elif (
+                        "reasoningContent" in start_obj
+                        and start_obj["reasoningContent"] is not None
+                    ):  # redacted thinking can be in start object
+                        thinking_blocks = self.translate_thinking_blocks(
+                            start_obj["reasoningContent"]
+                        )
+                        provider_specific_fields = {
+                            "reasoningContent": start_obj["reasoningContent"],
+                        }
            elif "delta" in chunk_data:
                delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
                self.content_blocks.append(delta_obj)