Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)

* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type allows switching between gemini/vllm easily * [WIP] redacted thinking tests (#9044) * WIP: redacted thinking tests * test: add test for redacted thinking in assistant message --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(bedrock/): support redacted text on streaming + non-streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy allows using reasoning effort with thinking models on proxy * test: update tests * fix(utils.py): fix linting error * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(anthropic/chat/transformation.py): fix returning citations in chat completion --------- Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
2025-04-25 10:44:24 +00:00 · 2025-04-19 11:16:37 -07:00 · 2025-04-19 11:16:37 -07:00 · 72cf30c081
commit 72cf30c081
parent 6f5629cf64
20 changed files with 638 additions and 109 deletions
--- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
+++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
@ -14,6 +14,7 @@ from litellm.types.llms.openai import ChatCompletionThinkingBlock
 from litellm.types.utils import (
    ChatCompletionDeltaToolCall,
    ChatCompletionMessageToolCall,
    ChatCompletionRedactedThinkingBlock,
    Choices,
    Delta,
    EmbeddingResponse,
@ -486,7 +487,14 @@ def convert_to_model_response_object(  # noqa: PLR0915
                    )
                    # Handle thinking models that display `thinking_blocks` within `content`
-                    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+                    thinking_blocks: Optional[
                        List[
                            Union[
                                ChatCompletionThinkingBlock,
                                ChatCompletionRedactedThinkingBlock,
                            ]
                        ]
                    ] = None
                    if "thinking_blocks" in choice["message"]:
                        thinking_blocks = choice["message"]["thinking_blocks"]
                        provider_specific_fields["thinking_blocks"] = thinking_blocks
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@ -471,3 +471,59 @@ def unpack_defs(schema, defs):
                unpack_defs(ref, defs)
                value["items"] = ref
                continue
 def _get_image_mime_type_from_url(url: str) -> Optional[str]:
    """
    Get mime type for common image URLs
    See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
    Supported by Gemini:
     application/pdf
    audio/mpeg
    audio/mp3
    audio/wav
    image/png
    image/jpeg
    image/webp
    text/plain
    video/mov
    video/mpeg
    video/mp4
    video/mpg
    video/avi
    video/wmv
    video/mpegps
    video/flv
    """
    url = url.lower()
    # Map file extensions to mime types
    mime_types = {
        # Images
        (".jpg", ".jpeg"): "image/jpeg",
        (".png",): "image/png",
        (".webp",): "image/webp",
        # Videos
        (".mp4",): "video/mp4",
        (".mov",): "video/mov",
        (".mpeg", ".mpg"): "video/mpeg",
        (".avi",): "video/avi",
        (".wmv",): "video/wmv",
        (".mpegps",): "video/mpegps",
        (".flv",): "video/flv",
        # Audio
        (".mp3",): "audio/mp3",
        (".wav",): "audio/wav",
        (".mpeg",): "audio/mpeg",
        # Documents
        (".pdf",): "application/pdf",
        (".txt",): "text/plain",
    }
    # Check each extension group against the URL
    for extensions, mime_type in mime_types.items():
        if any(url.endswith(ext) for ext in extensions):
            return mime_type
    return None
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@ -2258,6 +2258,14 @@ def _parse_content_type(content_type: str) -> str:
    return m.get_content_type()
 def _parse_mime_type(base64_data: str) -> Optional[str]:
    mime_type_match = re.match(r"data:(.*?);base64", base64_data)
    if mime_type_match:
        return mime_type_match.group(1)
    else:
        return None
 class BedrockImageProcessor:
    """Handles both sync and async image processing for Bedrock conversations."""
--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -29,6 +29,7 @@ from litellm.types.llms.anthropic import (
    UsageDelta,
 )
 from litellm.types.llms.openai import (
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
 )
@ -501,18 +502,19 @@ class ModelResponseIterator:
    ) -> Tuple[
        str,
        Optional[ChatCompletionToolCallChunk],
-        List[ChatCompletionThinkingBlock],
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
        Dict[str, Any],
    ]:
        """
        Helper function to handle the content block delta
        """
        text = ""
        tool_use: Optional[ChatCompletionToolCallChunk] = None
        provider_specific_fields = {}
        content_block = ContentBlockDelta(**chunk)  # type: ignore
-        thinking_blocks: List[ChatCompletionThinkingBlock] = []
+        thinking_blocks: List[
            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
        ] = []
        self.content_blocks.append(content_block)
        if "text" in content_block["delta"]:
@ -541,20 +543,25 @@ class ModelResponseIterator:
                )
            ]
            provider_specific_fields["thinking_blocks"] = thinking_blocks
        return text, tool_use, thinking_blocks, provider_specific_fields
    def _handle_reasoning_content(
-        self, thinking_blocks: List[ChatCompletionThinkingBlock]
+        self,
        thinking_blocks: List[
            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
        ],
    ) -> Optional[str]:
        """
        Handle the reasoning content
        """
        reasoning_content = None
        for block in thinking_blocks:
            thinking_content = cast(Optional[str], block.get("thinking"))
            if reasoning_content is None:
                reasoning_content = ""
-            if "thinking" in block:
+            if thinking_content is not None:
-                reasoning_content += block["thinking"]
+                reasoning_content += thinking_content
        return reasoning_content
    def chunk_parser(self, chunk: dict) -> ModelResponseStream:
@ -567,7 +574,13 @@ class ModelResponseIterator:
            usage: Optional[Usage] = None
            provider_specific_fields: Dict[str, Any] = {}
            reasoning_content: Optional[str] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
                List[
                    Union[
                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
                    ]
                ]
            ] = None
            index = int(chunk.get("index", 0))
            if type_chunk == "content_block_delta":
@ -605,6 +618,15 @@ class ModelResponseIterator:
                        },
                        "index": self.tool_index,
                    }
                elif (
                    content_block_start["content_block"]["type"] == "redacted_thinking"
                ):
                    thinking_blocks = [
                        ChatCompletionRedactedThinkingBlock(
                            type="redacted_thinking",
                            data=content_block_start["content_block"]["data"],
                        )
                    ]
            elif type_chunk == "content_block_stop":
                ContentBlockStop(**chunk)  # type: ignore
                # check if tool call content block
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -30,6 +30,7 @@ from litellm.types.llms.openai import (
    REASONING_EFFORT,
    AllMessageValues,
    ChatCompletionCachedContent,
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionSystemMessage,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
@ -575,13 +576,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
    ) -> Tuple[
        str,
        Optional[List[Any]],
-        Optional[List[ChatCompletionThinkingBlock]],
+        Optional[
            List[
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ],
        Optional[str],
        List[ChatCompletionToolCallChunk],
    ]:
        text_content = ""
        citations: Optional[List[Any]] = None
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+        thinking_blocks: Optional[
            List[
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ] = None
        reasoning_content: Optional[str] = None
        tool_calls: List[ChatCompletionToolCallChunk] = []
        for idx, content in enumerate(completion_response["content"]):
@ -600,20 +609,30 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
                        index=idx,
                    )
                )
-            ## CITATIONS
+
-            if content.get("citations", None) is not None:
+            elif content.get("thinking", None) is not None:
                if citations is None:
                    citations = []
                citations.append(content["citations"])
            if content.get("thinking", None) is not None:
                if thinking_blocks is None:
                    thinking_blocks = []
                thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
            elif content["type"] == "redacted_thinking":
                if thinking_blocks is None:
                    thinking_blocks = []
                thinking_blocks.append(
                    cast(ChatCompletionRedactedThinkingBlock, content)
                )
            ## CITATIONS
            if content.get("citations") is not None:
                if citations is None:
                    citations = []
                citations.append(content["citations"])
        if thinking_blocks is not None:
            reasoning_content = ""
            for block in thinking_blocks:
-                if "thinking" in block:
+                thinking_content = cast(Optional[str], block.get("thinking"))
-                    reasoning_content += block["thinking"]
+                if thinking_content is not None:
                    reasoning_content += thinking_content
        return text_content, citations, thinking_blocks, reasoning_content, tool_calls
    def calculate_usage(
@ -703,7 +722,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
        else:
            text_content = ""
            citations: Optional[List[Any]] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
                List[
                    Union[
                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
                    ]
                ]
            ] = None
            reasoning_content: Optional[str] = None
            tool_calls: List[ChatCompletionToolCallChunk] = []
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -22,6 +22,7 @@ from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMExcepti
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionResponseMessage,
    ChatCompletionSystemMessage,
    ChatCompletionThinkingBlock,
@ -627,9 +628,11 @@ class AmazonConverseConfig(BaseConfig):
    def _transform_thinking_blocks(
        self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
-    ) -> List[ChatCompletionThinkingBlock]:
+    ) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
        """Return a consistent format for thinking blocks between Anthropic and Bedrock."""
-        thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
+        thinking_blocks_list: List[
            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
        ] = []
        for block in thinking_blocks:
            if "reasoningText" in block:
                _thinking_block = ChatCompletionThinkingBlock(type="thinking")
@ -640,6 +643,11 @@ class AmazonConverseConfig(BaseConfig):
                if _signature is not None:
                    _thinking_block["signature"] = _signature
                thinking_blocks_list.append(_thinking_block)
            elif "redactedContent" in block:
                _redacted_block = ChatCompletionRedactedThinkingBlock(
                    type="redacted_thinking", data=block["redactedContent"]
                )
                thinking_blocks_list.append(_redacted_block)
        return thinking_blocks_list
    def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
 )
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
@ -1255,18 +1256,32 @@ class AWSEventStreamDecoder:
    def translate_thinking_blocks(
        self, thinking_block: BedrockConverseReasoningContentBlockDelta
-    ) -> Optional[List[ChatCompletionThinkingBlock]]:
+    ) -> Optional[
        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
    ]:
        """
        Translate the thinking blocks to a string
        """
-        thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
+        thinking_blocks_list: List[
-        _thinking_block = ChatCompletionThinkingBlock(type="thinking")
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
        ] = []
        _thinking_block: Optional[
            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
        ] = None
        if "text" in thinking_block:
            _thinking_block = ChatCompletionThinkingBlock(type="thinking")
            _thinking_block["thinking"] = thinking_block["text"]
        elif "signature" in thinking_block:
            _thinking_block = ChatCompletionThinkingBlock(type="thinking")
            _thinking_block["signature"] = thinking_block["signature"]
            _thinking_block["thinking"] = ""  # consistent with anthropic response
        elif "redactedContent" in thinking_block:
            _thinking_block = ChatCompletionRedactedThinkingBlock(
                type="redacted_thinking", data=thinking_block["redactedContent"]
            )
        if _thinking_block is not None:
            thinking_blocks_list.append(_thinking_block)
        return thinking_blocks_list
@ -1279,17 +1294,20 @@ class AWSEventStreamDecoder:
            usage: Optional[Usage] = None
            provider_specific_fields: dict = {}
            reasoning_content: Optional[str] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
                List[
                    Union[
                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
                    ]
                ]
            ] = None
            index = int(chunk_data.get("contentBlockIndex", 0))
            if "start" in chunk_data:
                start_obj = ContentBlockStartEvent(**chunk_data["start"])
                self.content_blocks = []  # reset
-                if (
+                if start_obj is not None:
-                    start_obj is not None
+                    if "toolUse" in start_obj and start_obj["toolUse"] is not None:
                    and "toolUse" in start_obj
                    and start_obj["toolUse"] is not None
                ):
                        ## check tool name was formatted by litellm
                        _response_tool_name = start_obj["toolUse"]["name"]
                        response_tool_name = get_bedrock_tool_name(
@ -1304,6 +1322,16 @@ class AWSEventStreamDecoder:
                            },
                            "index": index,
                        }
                    elif (
                        "reasoningContent" in start_obj
                        and start_obj["reasoningContent"] is not None
                    ):  # redacted thinking can be in start object
                        thinking_blocks = self.translate_thinking_blocks(
                            start_obj["reasoningContent"]
                        )
                        provider_specific_fields = {
                            "reasoningContent": start_obj["reasoningContent"],
                        }
            elif "delta" in chunk_data:
                delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
                self.content_blocks.append(delta_obj)
--- a/litellm/llms/databricks/chat/transformation.py
+++ b/litellm/llms/databricks/chat/transformation.py
@ -37,6 +37,7 @@ from litellm.types.llms.databricks import (
 )
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolChoiceFunctionParam,
    ChatCompletionToolChoiceObjectParam,
@ -314,13 +315,24 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
    @staticmethod
    def extract_reasoning_content(
        content: Optional[AllDatabricksContentValues],
-    ) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]:
+    ) -> Tuple[
        Optional[str],
        Optional[
            List[
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ],
    ]:
        """
        Extract and return the reasoning content and thinking blocks
        """
        if content is None:
            return None, None
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+        thinking_blocks: Optional[
            List[
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ] = None
        reasoning_content: Optional[str] = None
        if isinstance(content, list):
            for item in content:
--- a/litellm/llms/hosted_vllm/chat/transformation.py
+++ b/litellm/llms/hosted_vllm/chat/transformation.py
@ -2,9 +2,19 @@
 Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
 """
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple, cast
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
    _get_image_mime_type_from_url,
 )
 from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionFileObject,
    ChatCompletionVideoObject,
    ChatCompletionVideoUrlObject,
 )
 from ....utils import _remove_additional_properties, _remove_strict_from_schema
 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -38,3 +48,71 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
            api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
        )  # vllm does not require an api key
        return api_base, dynamic_api_key
    def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
        """
        Check if the file is a video
        - format: video/<extension>
        - file_data: base64 encoded video data
        - file_id: infer mp4 from extension
        """
        file = content_item.get("file", {})
        format = file.get("format")
        file_data = file.get("file_data")
        file_id = file.get("file_id")
        if content_item.get("type") != "file":
            return False
        if format and format.startswith("video/"):
            return True
        elif file_data:
            mime_type = _parse_mime_type(file_data)
            if mime_type and mime_type.startswith("video/"):
                return True
        elif file_id:
            mime_type = _get_image_mime_type_from_url(file_id)
            if mime_type and mime_type.startswith("video/"):
                return True
        return False
    def _convert_file_to_video_url(
        self, content_item: ChatCompletionFileObject
    ) -> ChatCompletionVideoObject:
        file = content_item.get("file", {})
        file_id = file.get("file_id")
        file_data = file.get("file_data")
        if file_id:
            return ChatCompletionVideoObject(
                type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
            )
        elif file_data:
            return ChatCompletionVideoObject(
                type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
            )
        raise ValueError("file_id or file_data is required")
    def _transform_messages(
        self, messages: List[AllMessageValues], model: str
    ) -> List[AllMessageValues]:
        """
        Support translating video files from file_id or file_data to video_url
        """
        for message in messages:
            if message["role"] == "user":
                message_content = message.get("content")
                if message_content and isinstance(message_content, list):
                    replaced_content_items: List[
                        Tuple[int, ChatCompletionFileObject]
                    ] = []
                    for idx, content_item in enumerate(message_content):
                        if content_item.get("type") == "file":
                            content_item = cast(ChatCompletionFileObject, content_item)
                            if self._is_video_file(content_item):
                                replaced_content_items.append((idx, content_item))
                    for idx, content_item in replaced_content_items:
                        message_content[idx] = self._convert_file_to_video_url(
                            content_item
                        )
        transformed_messages = super()._transform_messages(messages, model)
        return transformed_messages
--- a/litellm/llms/litellm_proxy/chat/transformation.py
+++ b/litellm/llms/litellm_proxy/chat/transformation.py
@ -13,6 +13,7 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
    def get_supported_openai_params(self, model: str) -> List:
        list = super().get_supported_openai_params(model)
        list.append("thinking")
        list.append("reasoning_effort")
        return list
    def _map_openai_params(
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@ -12,6 +12,9 @@ from pydantic import BaseModel
 import litellm
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
    _get_image_mime_type_from_url,
 )
 from litellm.litellm_core_utils.prompt_templates.factory import (
    convert_to_anthropic_image_obj,
    convert_to_gemini_tool_call_invoke,
@ -99,62 +102,6 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
        raise e
 def _get_image_mime_type_from_url(url: str) -> Optional[str]:
    """
    Get mime type for common image URLs
    See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
    Supported by Gemini:
     application/pdf
    audio/mpeg
    audio/mp3
    audio/wav
    image/png
    image/jpeg
    image/webp
    text/plain
    video/mov
    video/mpeg
    video/mp4
    video/mpg
    video/avi
    video/wmv
    video/mpegps
    video/flv
    """
    url = url.lower()
    # Map file extensions to mime types
    mime_types = {
        # Images
        (".jpg", ".jpeg"): "image/jpeg",
        (".png",): "image/png",
        (".webp",): "image/webp",
        # Videos
        (".mp4",): "video/mp4",
        (".mov",): "video/mov",
        (".mpeg", ".mpg"): "video/mpeg",
        (".avi",): "video/avi",
        (".wmv",): "video/wmv",
        (".mpegps",): "video/mpegps",
        (".flv",): "video/flv",
        # Audio
        (".mp3",): "audio/mp3",
        (".wav",): "audio/wav",
        (".mpeg",): "audio/mpeg",
        # Documents
        (".pdf",): "application/pdf",
        (".txt",): "text/plain",
    }
    # Check each extension group against the URL
    for extensions, mime_type in mime_types.items():
        if any(url.endswith(ext) for ext in extensions):
            return mime_type
    return None
 def _gemini_convert_messages_with_history(  # noqa: PLR0915
    messages: List[AllMessageValues],
 ) -> List[ContentType]:
--- a/litellm/types/llms/bedrock.py
+++ b/litellm/types/llms/bedrock.py
@ -179,6 +179,7 @@ class ToolUseBlockStartEvent(TypedDict):
 class ContentBlockStartEvent(TypedDict, total=False):
    toolUse: Optional[ToolUseBlockStartEvent]
    reasoningContent: BedrockConverseReasoningContentBlockDelta
 class ContentBlockDeltaEvent(TypedDict, total=False):
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -468,6 +468,12 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
 class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
    type: Required[Literal["redacted_thinking"]]
    data: str
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
 class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
    city: str
    """Free text input for the city of the user, e.g. `San Francisco`."""
@ -797,7 +803,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
    function_call: Optional[ChatCompletionToolCallFunctionChunk]
    provider_specific_fields: Optional[dict]
    reasoning_content: Optional[str]
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+    thinking_blocks: Optional[
        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
    ]
 class ChatCompletionUsageBlock(TypedDict):
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -29,6 +29,7 @@ from .guardrails import GuardrailEventHooks
 from .llms.openai import (
    Batch,
    ChatCompletionAnnotation,
    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
    ChatCompletionUsageBlock,
@ -552,7 +553,9 @@ class Message(OpenAIObject):
    function_call: Optional[FunctionCall]
    audio: Optional[ChatCompletionAudioResponse] = None
    reasoning_content: Optional[str] = None
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+    thinking_blocks: Optional[
        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
    ] = None
    provider_specific_fields: Optional[Dict[str, Any]] = Field(
        default=None, exclude=True
    )
@ -567,7 +570,11 @@ class Message(OpenAIObject):
        audio: Optional[ChatCompletionAudioResponse] = None,
        provider_specific_fields: Optional[Dict[str, Any]] = None,
        reasoning_content: Optional[str] = None,
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        thinking_blocks: Optional[
            List[
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ] = None,
        annotations: Optional[List[ChatCompletionAnnotation]] = None,
        **params,
    ):
@ -650,7 +657,9 @@ class Message(OpenAIObject):
 class Delta(OpenAIObject):
    reasoning_content: Optional[str] = None
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+    thinking_blocks: Optional[
        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
    ] = None
    provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
    def __init__(
@ -661,7 +670,11 @@ class Delta(OpenAIObject):
        tool_calls=None,
        audio: Optional[ChatCompletionAudioResponse] = None,
        reasoning_content: Optional[str] = None,
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        thinking_blocks: Optional[
            List[
                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
            ]
        ] = None,
        annotations: Optional[List[ChatCompletionAnnotation]] = None,
        **params,
    ):
--- a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_handler.py
+++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_handler.py
@ -0,0 +1,38 @@
 import json
 import os
 import sys
 from unittest.mock import MagicMock
 import pytest
 from fastapi.testclient import TestClient
 sys.path.insert(
    0, os.path.abspath("../../../../..")
 )  # Adds the parent directory to the system path
 from litellm.llms.anthropic.chat.handler import ModelResponseIterator
 def test_redacted_thinking_content_block_delta():
    chunk = {
        "type": "content_block_start",
        "index": 58,
        "content_block": {
            "type": "redacted_thinking",
            "data": "EuoBCoYBGAIiQJ/SxkPAgqxhKok29YrpJHRUJ0OT8ahCHKAwyhmRuUhtdmDX9+mn4gDzKNv3fVpQdB01zEPMzNY3QuTCd+1bdtEqQK6JuKHqdndbwpr81oVWb4wxd1GqF/7Jkw74IlQa27oobX+KuRkopr9Dllt/RDe7Se0sI1IkU7tJIAQCoP46OAwSDF51P09q67xhHlQ3ihoM2aOVlkghq/X0w8NlIjBMNvXYNbjhyrOcIg6kPFn2ed/KK7Cm5prYAtXCwkb4Wr5tUSoSHu9T5hKdJRbr6WsqEc7Lle7FULqMLZGkhqXyc3BA",
        },
    }
    model_response_iterator = ModelResponseIterator(
        streaming_response=MagicMock(), sync_stream=False, json_mode=False
    )
    model_response = model_response_iterator.chunk_parser(chunk=chunk)
    print(f"\n\nmodel_response: {model_response}\n\n")
    assert model_response.choices[0].delta.thinking_blocks is not None
    assert len(model_response.choices[0].delta.thinking_blocks) == 1
    print(
        f"\n\nmodel_response.choices[0].delta.thinking_blocks[0]: {model_response.choices[0].delta.thinking_blocks[0]}\n\n"
    )
    assert (
        model_response.choices[0].delta.thinking_blocks[0]["type"]
        == "redacted_thinking"
    )
--- a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
+++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
@ -56,3 +56,58 @@ def test_calculate_usage():
    assert usage.prompt_tokens_details.cached_tokens == 0
    assert usage._cache_creation_input_tokens == 12304
    assert usage._cache_read_input_tokens == 0
 def test_extract_response_content_with_citations():
    config = AnthropicConfig()
    completion_response = {
        "id": "msg_01XrAv7gc5tQNDuoADra7vB4",
        "type": "message",
        "role": "assistant",
        "model": "claude-3-5-sonnet-20241022",
        "content": [
            {"type": "text", "text": "According to the documents, "},
            {
                "citations": [
                    {
                        "type": "char_location",
                        "cited_text": "The grass is green. ",
                        "document_index": 0,
                        "document_title": "My Document",
                        "start_char_index": 0,
                        "end_char_index": 20,
                    }
                ],
                "type": "text",
                "text": "the grass is green",
            },
            {"type": "text", "text": " and "},
            {
                "citations": [
                    {
                        "type": "char_location",
                        "cited_text": "The sky is blue.",
                        "document_index": 0,
                        "document_title": "My Document",
                        "start_char_index": 20,
                        "end_char_index": 36,
                    }
                ],
                "type": "text",
                "text": "the sky is blue",
            },
            {"type": "text", "text": "."},
        ],
        "stop_reason": "end_turn",
        "stop_sequence": None,
        "usage": {
            "input_tokens": 610,
            "cache_creation_input_tokens": 0,
            "cache_read_input_tokens": 0,
            "output_tokens": 51,
        },
    }
    _, citations, _, _, _ = config.extract_response_content(completion_response)
    assert citations is not None
--- a/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
+++ b/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
@ -40,3 +40,22 @@ def test_transform_usage():
    )
    assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"]
    assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"]
 def test_transform_thinking_blocks_with_redacted_content():
    thinking_blocks = [
        {
            "reasoningText": {
                "text": "This is a test",
                "signature": "test_signature",
            }
        },
        {
            "redactedContent": "This is a redacted content",
        },
    ]
    config = AmazonConverseConfig()
    transformed_thinking_blocks = config._transform_thinking_blocks(thinking_blocks)
    assert len(transformed_thinking_blocks) == 2
    assert transformed_thinking_blocks[0]["type"] == "thinking"
    assert transformed_thinking_blocks[1]["type"] == "redacted_thinking"
--- a/tests/litellm/llms/bedrock/chat/test_invoke_handler.py
+++ b/tests/litellm/llms/bedrock/chat/test_invoke_handler.py
@ -0,0 +1,22 @@
 import json
 import os
 import sys
 import pytest
 from fastapi.testclient import TestClient
 sys.path.insert(
    0, os.path.abspath("../../../../..")
 )  # Adds the parent directory to the system path
 from unittest.mock import MagicMock, patch
 from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
 def test_transform_thinking_blocks_with_redacted_content():
    thinking_block = {"redactedContent": "This is a redacted content"}
    decoder = AWSEventStreamDecoder(model="test")
    transformed_thinking_blocks = decoder.translate_thinking_blocks(thinking_block)
    assert len(transformed_thinking_blocks) == 1
    assert transformed_thinking_blocks[0]["type"] == "redacted_thinking"
    assert transformed_thinking_blocks[0]["data"] == "This is a redacted content"
--- a/tests/litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
+++ b/tests/litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
@ -0,0 +1,45 @@
 import json
 import os
 import sys
 from unittest.mock import AsyncMock, MagicMock, patch
 import httpx
 import pytest
 sys.path.insert(
    0, os.path.abspath("../../../../..")
 )  # Adds the parent directory to the system path
 from litellm.llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
 def test_hosted_vllm_chat_transformation_file_url():
    config = HostedVLLMChatConfig()
    video_url = "https://example.com/video.mp4"
    video_data = f"data:video/mp4;base64,{video_url}"
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "file",
                    "file": {
                        "file_data": video_data,
                    },
                }
            ],
        }
    ]
    transformed_response = config.transform_request(
        model="hosted_vllm/llama-3.1-70b-instruct",
        messages=messages,
        optional_params={},
        litellm_params={},
        headers={},
    )
    assert transformed_response["messages"] == [
        {
            "role": "user",
            "content": [{"type": "video_url", "video_url": {"url": video_data}}],
        }
    ]
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py