Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)

* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type allows switching between gemini/vllm easily * [WIP] redacted thinking tests (#9044) * WIP: redacted thinking tests * test: add test for redacted thinking in assistant message --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(bedrock/): support redacted text on streaming + non-streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy allows using reasoning effort with thinking models on proxy * test: update tests * fix(utils.py): fix linting error * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(anthropic/chat/transformation.py): fix returning citations in chat completion --------- Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
2025-04-24 18:24:20 +00:00 · 2025-04-19 11:16:37 -07:00 · 2025-04-19 11:16:37 -07:00 · f08a4e3c06
commit f08a4e3c06
parent 3c463f6715
20 changed files with 638 additions and 109 deletions
--- a/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
+++ b/litellm/litellm_core_utils/llm_response_utils/convert_dict_to_response.py
@ -14,6 +14,7 @@ from litellm.types.llms.openai import ChatCompletionThinkingBlock
 from litellm.types.utils import (
    ChatCompletionDeltaToolCall,
    ChatCompletionMessageToolCall,
+    ChatCompletionRedactedThinkingBlock,
    Choices,
    Delta,
    EmbeddingResponse,
@ -486,7 +487,14 @@ def convert_to_model_response_object(  # noqa: PLR0915
                    )

                    # Handle thinking models that display `thinking_blocks` within `content`
-                    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+                    thinking_blocks: Optional[
+                        List[
+                            Union[
+                                ChatCompletionThinkingBlock,
+                                ChatCompletionRedactedThinkingBlock,
+                            ]
+                        ]
+                    ] = None
                    if "thinking_blocks" in choice["message"]:
                        thinking_blocks = choice["message"]["thinking_blocks"]
                        provider_specific_fields["thinking_blocks"] = thinking_blocks
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@ -471,3 +471,59 @@ def unpack_defs(schema, defs):
                unpack_defs(ref, defs)
                value["items"] = ref
                continue
+
+
+def _get_image_mime_type_from_url(url: str) -> Optional[str]:
+    """
+    Get mime type for common image URLs
+    See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
+
+    Supported by Gemini:
+     application/pdf
+    audio/mpeg
+    audio/mp3
+    audio/wav
+    image/png
+    image/jpeg
+    image/webp
+    text/plain
+    video/mov
+    video/mpeg
+    video/mp4
+    video/mpg
+    video/avi
+    video/wmv
+    video/mpegps
+    video/flv
+    """
+    url = url.lower()
+
+    # Map file extensions to mime types
+    mime_types = {
+        # Images
+        (".jpg", ".jpeg"): "image/jpeg",
+        (".png",): "image/png",
+        (".webp",): "image/webp",
+        # Videos
+        (".mp4",): "video/mp4",
+        (".mov",): "video/mov",
+        (".mpeg", ".mpg"): "video/mpeg",
+        (".avi",): "video/avi",
+        (".wmv",): "video/wmv",
+        (".mpegps",): "video/mpegps",
+        (".flv",): "video/flv",
+        # Audio
+        (".mp3",): "audio/mp3",
+        (".wav",): "audio/wav",
+        (".mpeg",): "audio/mpeg",
+        # Documents
+        (".pdf",): "application/pdf",
+        (".txt",): "text/plain",
+    }
+
+    # Check each extension group against the URL
+    for extensions, mime_type in mime_types.items():
+        if any(url.endswith(ext) for ext in extensions):
+            return mime_type
+
+    return None
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@ -2258,6 +2258,14 @@ def _parse_content_type(content_type: str) -> str:
    return m.get_content_type()


+def _parse_mime_type(base64_data: str) -> Optional[str]:
+    mime_type_match = re.match(r"data:(.*?);base64", base64_data)
+    if mime_type_match:
+        return mime_type_match.group(1)
+    else:
+        return None
+
+
 class BedrockImageProcessor:
    """Handles both sync and async image processing for Bedrock conversations."""

--- a/litellm/llms/anthropic/chat/handler.py
+++ b/litellm/llms/anthropic/chat/handler.py
@ -29,6 +29,7 @@ from litellm.types.llms.anthropic import (
    UsageDelta,
 )
 from litellm.types.llms.openai import (
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
 )
@ -501,18 +502,19 @@ class ModelResponseIterator:
    ) -> Tuple[
        str,
        Optional[ChatCompletionToolCallChunk],
-        List[ChatCompletionThinkingBlock],
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
        Dict[str, Any],
    ]:
        """
        Helper function to handle the content block delta
        """
-
        text = ""
        tool_use: Optional[ChatCompletionToolCallChunk] = None
        provider_specific_fields = {}
        content_block = ContentBlockDelta(**chunk)  # type: ignore
-        thinking_blocks: List[ChatCompletionThinkingBlock] = []
+        thinking_blocks: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = []

        self.content_blocks.append(content_block)
        if "text" in content_block["delta"]:
@ -541,20 +543,25 @@ class ModelResponseIterator:
                )
            ]
            provider_specific_fields["thinking_blocks"] = thinking_blocks
+
        return text, tool_use, thinking_blocks, provider_specific_fields

    def _handle_reasoning_content(
-        self, thinking_blocks: List[ChatCompletionThinkingBlock]
+        self,
+        thinking_blocks: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ],
    ) -> Optional[str]:
        """
        Handle the reasoning content
        """
        reasoning_content = None
        for block in thinking_blocks:
+            thinking_content = cast(Optional[str], block.get("thinking"))
            if reasoning_content is None:
                reasoning_content = ""
-            if "thinking" in block:
-                reasoning_content += block["thinking"]
+            if thinking_content is not None:
+                reasoning_content += thinking_content
        return reasoning_content

    def chunk_parser(self, chunk: dict) -> ModelResponseStream:
@ -567,7 +574,13 @@ class ModelResponseIterator:
            usage: Optional[Usage] = None
            provider_specific_fields: Dict[str, Any] = {}
            reasoning_content: Optional[str] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
+                List[
+                    Union[
+                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
+                    ]
+                ]
+            ] = None

            index = int(chunk.get("index", 0))
            if type_chunk == "content_block_delta":
@ -605,6 +618,15 @@ class ModelResponseIterator:
                        },
                        "index": self.tool_index,
                    }
+                elif (
+                    content_block_start["content_block"]["type"] == "redacted_thinking"
+                ):
+                    thinking_blocks = [
+                        ChatCompletionRedactedThinkingBlock(
+                            type="redacted_thinking",
+                            data=content_block_start["content_block"]["data"],
+                        )
+                    ]
            elif type_chunk == "content_block_stop":
                ContentBlockStop(**chunk)  # type: ignore
                # check if tool call content block
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -30,6 +30,7 @@ from litellm.types.llms.openai import (
    REASONING_EFFORT,
    AllMessageValues,
    ChatCompletionCachedContent,
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionSystemMessage,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
@ -575,13 +576,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
    ) -> Tuple[
        str,
        Optional[List[Any]],
-        Optional[List[ChatCompletionThinkingBlock]],
+        Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ],
        Optional[str],
        List[ChatCompletionToolCallChunk],
    ]:
        text_content = ""
        citations: Optional[List[Any]] = None
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+        thinking_blocks: Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ] = None
        reasoning_content: Optional[str] = None
        tool_calls: List[ChatCompletionToolCallChunk] = []
        for idx, content in enumerate(completion_response["content"]):
@ -600,20 +609,30 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
                        index=idx,
                    )
                )
-            ## CITATIONS
-            if content.get("citations", None) is not None:
-                if citations is None:
-                    citations = []
-                citations.append(content["citations"])
-            if content.get("thinking", None) is not None:
+
+            elif content.get("thinking", None) is not None:
                if thinking_blocks is None:
                    thinking_blocks = []
                thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
+            elif content["type"] == "redacted_thinking":
+                if thinking_blocks is None:
+                    thinking_blocks = []
+                thinking_blocks.append(
+                    cast(ChatCompletionRedactedThinkingBlock, content)
+                )
+
+            ## CITATIONS
+            if content.get("citations") is not None:
+                if citations is None:
+                    citations = []
+                citations.append(content["citations"])
        if thinking_blocks is not None:
            reasoning_content = ""
            for block in thinking_blocks:
-                if "thinking" in block:
-                    reasoning_content += block["thinking"]
+                thinking_content = cast(Optional[str], block.get("thinking"))
+                if thinking_content is not None:
+                    reasoning_content += thinking_content
+
        return text_content, citations, thinking_blocks, reasoning_content, tool_calls

    def calculate_usage(
@ -703,7 +722,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
        else:
            text_content = ""
            citations: Optional[List[Any]] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
+                List[
+                    Union[
+                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
+                    ]
+                ]
+            ] = None
            reasoning_content: Optional[str] = None
            tool_calls: List[ChatCompletionToolCallChunk] = []

--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -22,6 +22,7 @@ from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMExcepti
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
    AllMessageValues,
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionResponseMessage,
    ChatCompletionSystemMessage,
    ChatCompletionThinkingBlock,
@ -627,9 +628,11 @@ class AmazonConverseConfig(BaseConfig):

    def _transform_thinking_blocks(
        self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
-    ) -> List[ChatCompletionThinkingBlock]:
+    ) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
        """Return a consistent format for thinking blocks between Anthropic and Bedrock."""
-        thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
+        thinking_blocks_list: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = []
        for block in thinking_blocks:
            if "reasoningText" in block:
                _thinking_block = ChatCompletionThinkingBlock(type="thinking")
@ -640,6 +643,11 @@ class AmazonConverseConfig(BaseConfig):
                if _signature is not None:
                    _thinking_block["signature"] = _signature
                thinking_blocks_list.append(_thinking_block)
+            elif "redactedContent" in block:
+                _redacted_block = ChatCompletionRedactedThinkingBlock(
+                    type="redacted_thinking", data=block["redactedContent"]
+                )
+                thinking_blocks_list.append(_redacted_block)
        return thinking_blocks_list

    def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
 )
 from litellm.types.llms.bedrock import *
 from litellm.types.llms.openai import (
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:

    def translate_thinking_blocks(
        self, thinking_block: BedrockConverseReasoningContentBlockDelta
-    ) -> Optional[List[ChatCompletionThinkingBlock]]:
+    ) -> Optional[
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
+    ]:
        """
        Translate the thinking blocks to a string
        """

-        thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
-        _thinking_block = ChatCompletionThinkingBlock(type="thinking")
+        thinking_blocks_list: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = []
+        _thinking_block: Optional[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = None
+
        if "text" in thinking_block:
+            _thinking_block = ChatCompletionThinkingBlock(type="thinking")
            _thinking_block["thinking"] = thinking_block["text"]
        elif "signature" in thinking_block:
+            _thinking_block = ChatCompletionThinkingBlock(type="thinking")
            _thinking_block["signature"] = thinking_block["signature"]
            _thinking_block["thinking"] = ""  # consistent with anthropic response
-        thinking_blocks_list.append(_thinking_block)
+        elif "redactedContent" in thinking_block:
+            _thinking_block = ChatCompletionRedactedThinkingBlock(
+                type="redacted_thinking", data=thinking_block["redactedContent"]
+            )
+        if _thinking_block is not None:
+            thinking_blocks_list.append(_thinking_block)
        return thinking_blocks_list

    def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
            usage: Optional[Usage] = None
            provider_specific_fields: dict = {}
            reasoning_content: Optional[str] = None
-            thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+            thinking_blocks: Optional[
+                List[
+                    Union[
+                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
+                    ]
+                ]
+            ] = None

            index = int(chunk_data.get("contentBlockIndex", 0))
            if "start" in chunk_data:
                start_obj = ContentBlockStartEvent(**chunk_data["start"])
                self.content_blocks = []  # reset
-                if (
-                    start_obj is not None
-                    and "toolUse" in start_obj
-                    and start_obj["toolUse"] is not None
-                ):
-                    ## check tool name was formatted by litellm
-                    _response_tool_name = start_obj["toolUse"]["name"]
-                    response_tool_name = get_bedrock_tool_name(
-                        response_tool_name=_response_tool_name
-                    )
-                    tool_use = {
-                        "id": start_obj["toolUse"]["toolUseId"],
-                        "type": "function",
-                        "function": {
-                            "name": response_tool_name,
-                            "arguments": "",
-                        },
-                        "index": index,
-                    }
+                if start_obj is not None:
+                    if "toolUse" in start_obj and start_obj["toolUse"] is not None:
+                        ## check tool name was formatted by litellm
+                        _response_tool_name = start_obj["toolUse"]["name"]
+                        response_tool_name = get_bedrock_tool_name(
+                            response_tool_name=_response_tool_name
+                        )
+                        tool_use = {
+                            "id": start_obj["toolUse"]["toolUseId"],
+                            "type": "function",
+                            "function": {
+                                "name": response_tool_name,
+                                "arguments": "",
+                            },
+                            "index": index,
+                        }
+                    elif (
+                        "reasoningContent" in start_obj
+                        and start_obj["reasoningContent"] is not None
+                    ):  # redacted thinking can be in start object
+                        thinking_blocks = self.translate_thinking_blocks(
+                            start_obj["reasoningContent"]
+                        )
+                        provider_specific_fields = {
+                            "reasoningContent": start_obj["reasoningContent"],
+                        }
            elif "delta" in chunk_data:
                delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
                self.content_blocks.append(delta_obj)
--- a/litellm/llms/databricks/chat/transformation.py
+++ b/litellm/llms/databricks/chat/transformation.py
@ -37,6 +37,7 @@ from litellm.types.llms.databricks import (
 )
 from litellm.types.llms.openai import (
    AllMessageValues,
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolChoiceFunctionParam,
    ChatCompletionToolChoiceObjectParam,
@ -314,13 +315,24 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
    @staticmethod
    def extract_reasoning_content(
        content: Optional[AllDatabricksContentValues],
-    ) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]:
+    ) -> Tuple[
+        Optional[str],
+        Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ],
+    ]:
        """
        Extract and return the reasoning content and thinking blocks
        """
        if content is None:
            return None, None
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+        thinking_blocks: Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ] = None
        reasoning_content: Optional[str] = None
        if isinstance(content, list):
            for item in content:
--- a/litellm/llms/hosted_vllm/chat/transformation.py
+++ b/litellm/llms/hosted_vllm/chat/transformation.py
@ -2,9 +2,19 @@
 Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
 """

-from typing import Optional, Tuple
+from typing import List, Optional, Tuple, cast

+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    _get_image_mime_type_from_url,
+)
+from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
 from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionFileObject,
+    ChatCompletionVideoObject,
+    ChatCompletionVideoUrlObject,
+)

 from ....utils import _remove_additional_properties, _remove_strict_from_schema
 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -38,3 +48,71 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
            api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
        )  # vllm does not require an api key
        return api_base, dynamic_api_key
+
+    def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
+        """
+        Check if the file is a video
+
+        - format: video/<extension>
+        - file_data: base64 encoded video data
+        - file_id: infer mp4 from extension
+        """
+        file = content_item.get("file", {})
+        format = file.get("format")
+        file_data = file.get("file_data")
+        file_id = file.get("file_id")
+        if content_item.get("type") != "file":
+            return False
+        if format and format.startswith("video/"):
+            return True
+        elif file_data:
+            mime_type = _parse_mime_type(file_data)
+            if mime_type and mime_type.startswith("video/"):
+                return True
+        elif file_id:
+            mime_type = _get_image_mime_type_from_url(file_id)
+            if mime_type and mime_type.startswith("video/"):
+                return True
+        return False
+
+    def _convert_file_to_video_url(
+        self, content_item: ChatCompletionFileObject
+    ) -> ChatCompletionVideoObject:
+        file = content_item.get("file", {})
+        file_id = file.get("file_id")
+        file_data = file.get("file_data")
+
+        if file_id:
+            return ChatCompletionVideoObject(
+                type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
+            )
+        elif file_data:
+            return ChatCompletionVideoObject(
+                type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
+            )
+        raise ValueError("file_id or file_data is required")
+
+    def _transform_messages(
+        self, messages: List[AllMessageValues], model: str
+    ) -> List[AllMessageValues]:
+        """
+        Support translating video files from file_id or file_data to video_url
+        """
+        for message in messages:
+            if message["role"] == "user":
+                message_content = message.get("content")
+                if message_content and isinstance(message_content, list):
+                    replaced_content_items: List[
+                        Tuple[int, ChatCompletionFileObject]
+                    ] = []
+                    for idx, content_item in enumerate(message_content):
+                        if content_item.get("type") == "file":
+                            content_item = cast(ChatCompletionFileObject, content_item)
+                            if self._is_video_file(content_item):
+                                replaced_content_items.append((idx, content_item))
+                    for idx, content_item in replaced_content_items:
+                        message_content[idx] = self._convert_file_to_video_url(
+                            content_item
+                        )
+        transformed_messages = super()._transform_messages(messages, model)
+        return transformed_messages
--- a/litellm/llms/litellm_proxy/chat/transformation.py
+++ b/litellm/llms/litellm_proxy/chat/transformation.py
@ -13,6 +13,7 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
    def get_supported_openai_params(self, model: str) -> List:
        list = super().get_supported_openai_params(model)
        list.append("thinking")
+        list.append("reasoning_effort")
        return list

    def _map_openai_params(
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@ -12,6 +12,9 @@ from pydantic import BaseModel

 import litellm
 from litellm._logging import verbose_logger
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    _get_image_mime_type_from_url,
+)
 from litellm.litellm_core_utils.prompt_templates.factory import (
    convert_to_anthropic_image_obj,
    convert_to_gemini_tool_call_invoke,
@ -99,62 +102,6 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
        raise e


-def _get_image_mime_type_from_url(url: str) -> Optional[str]:
-    """
-    Get mime type for common image URLs
-    See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
-
-    Supported by Gemini:
-     application/pdf
-    audio/mpeg
-    audio/mp3
-    audio/wav
-    image/png
-    image/jpeg
-    image/webp
-    text/plain
-    video/mov
-    video/mpeg
-    video/mp4
-    video/mpg
-    video/avi
-    video/wmv
-    video/mpegps
-    video/flv
-    """
-    url = url.lower()
-
-    # Map file extensions to mime types
-    mime_types = {
-        # Images
-        (".jpg", ".jpeg"): "image/jpeg",
-        (".png",): "image/png",
-        (".webp",): "image/webp",
-        # Videos
-        (".mp4",): "video/mp4",
-        (".mov",): "video/mov",
-        (".mpeg", ".mpg"): "video/mpeg",
-        (".avi",): "video/avi",
-        (".wmv",): "video/wmv",
-        (".mpegps",): "video/mpegps",
-        (".flv",): "video/flv",
-        # Audio
-        (".mp3",): "audio/mp3",
-        (".wav",): "audio/wav",
-        (".mpeg",): "audio/mpeg",
-        # Documents
-        (".pdf",): "application/pdf",
-        (".txt",): "text/plain",
-    }
-
-    # Check each extension group against the URL
-    for extensions, mime_type in mime_types.items():
-        if any(url.endswith(ext) for ext in extensions):
-            return mime_type
-
-    return None
-
-
 def _gemini_convert_messages_with_history(  # noqa: PLR0915
    messages: List[AllMessageValues],
 ) -> List[ContentType]:
--- a/litellm/types/llms/bedrock.py
+++ b/litellm/types/llms/bedrock.py
@ -179,6 +179,7 @@ class ToolUseBlockStartEvent(TypedDict):

 class ContentBlockStartEvent(TypedDict, total=False):
    toolUse: Optional[ToolUseBlockStartEvent]
+    reasoningContent: BedrockConverseReasoningContentBlockDelta


 class ContentBlockDeltaEvent(TypedDict, total=False):
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -468,6 +468,12 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]


+class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
+    type: Required[Literal["redacted_thinking"]]
+    data: str
+    cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
+
+
 class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
    city: str
    """Free text input for the city of the user, e.g. `San Francisco`."""
@ -797,7 +803,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
    function_call: Optional[ChatCompletionToolCallFunctionChunk]
    provider_specific_fields: Optional[dict]
    reasoning_content: Optional[str]
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
+    thinking_blocks: Optional[
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
+    ]


 class ChatCompletionUsageBlock(TypedDict):
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -29,6 +29,7 @@ from .guardrails import GuardrailEventHooks
 from .llms.openai import (
    Batch,
    ChatCompletionAnnotation,
+    ChatCompletionRedactedThinkingBlock,
    ChatCompletionThinkingBlock,
    ChatCompletionToolCallChunk,
    ChatCompletionUsageBlock,
@ -552,7 +553,9 @@ class Message(OpenAIObject):
    function_call: Optional[FunctionCall]
    audio: Optional[ChatCompletionAudioResponse] = None
    reasoning_content: Optional[str] = None
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+    thinking_blocks: Optional[
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
+    ] = None
    provider_specific_fields: Optional[Dict[str, Any]] = Field(
        default=None, exclude=True
    )
@ -567,7 +570,11 @@ class Message(OpenAIObject):
        audio: Optional[ChatCompletionAudioResponse] = None,
        provider_specific_fields: Optional[Dict[str, Any]] = None,
        reasoning_content: Optional[str] = None,
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        thinking_blocks: Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ] = None,
        annotations: Optional[List[ChatCompletionAnnotation]] = None,
        **params,
    ):
@ -650,7 +657,9 @@ class Message(OpenAIObject):

 class Delta(OpenAIObject):
    reasoning_content: Optional[str] = None
-    thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
+    thinking_blocks: Optional[
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
+    ] = None
    provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)

    def __init__(
@ -661,7 +670,11 @@ class Delta(OpenAIObject):
        tool_calls=None,
        audio: Optional[ChatCompletionAudioResponse] = None,
        reasoning_content: Optional[str] = None,
-        thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
+        thinking_blocks: Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ] = None,
        annotations: Optional[List[ChatCompletionAnnotation]] = None,
        **params,
    ):
--- a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_handler.py
+++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_handler.py
@ -0,0 +1,38 @@
+import json
+import os
+import sys
+from unittest.mock import MagicMock
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.anthropic.chat.handler import ModelResponseIterator
+
+
+def test_redacted_thinking_content_block_delta():
+    chunk = {
+        "type": "content_block_start",
+        "index": 58,
+        "content_block": {
+            "type": "redacted_thinking",
+            "data": "EuoBCoYBGAIiQJ/SxkPAgqxhKok29YrpJHRUJ0OT8ahCHKAwyhmRuUhtdmDX9+mn4gDzKNv3fVpQdB01zEPMzNY3QuTCd+1bdtEqQK6JuKHqdndbwpr81oVWb4wxd1GqF/7Jkw74IlQa27oobX+KuRkopr9Dllt/RDe7Se0sI1IkU7tJIAQCoP46OAwSDF51P09q67xhHlQ3ihoM2aOVlkghq/X0w8NlIjBMNvXYNbjhyrOcIg6kPFn2ed/KK7Cm5prYAtXCwkb4Wr5tUSoSHu9T5hKdJRbr6WsqEc7Lle7FULqMLZGkhqXyc3BA",
+        },
+    }
+    model_response_iterator = ModelResponseIterator(
+        streaming_response=MagicMock(), sync_stream=False, json_mode=False
+    )
+    model_response = model_response_iterator.chunk_parser(chunk=chunk)
+    print(f"\n\nmodel_response: {model_response}\n\n")
+    assert model_response.choices[0].delta.thinking_blocks is not None
+    assert len(model_response.choices[0].delta.thinking_blocks) == 1
+    print(
+        f"\n\nmodel_response.choices[0].delta.thinking_blocks[0]: {model_response.choices[0].delta.thinking_blocks[0]}\n\n"
+    )
+    assert (
+        model_response.choices[0].delta.thinking_blocks[0]["type"]
+        == "redacted_thinking"
+    )
--- a/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
+++ b/tests/litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py
@ -56,3 +56,58 @@ def test_calculate_usage():
    assert usage.prompt_tokens_details.cached_tokens == 0
    assert usage._cache_creation_input_tokens == 12304
    assert usage._cache_read_input_tokens == 0
+
+
+def test_extract_response_content_with_citations():
+    config = AnthropicConfig()
+
+    completion_response = {
+        "id": "msg_01XrAv7gc5tQNDuoADra7vB4",
+        "type": "message",
+        "role": "assistant",
+        "model": "claude-3-5-sonnet-20241022",
+        "content": [
+            {"type": "text", "text": "According to the documents, "},
+            {
+                "citations": [
+                    {
+                        "type": "char_location",
+                        "cited_text": "The grass is green. ",
+                        "document_index": 0,
+                        "document_title": "My Document",
+                        "start_char_index": 0,
+                        "end_char_index": 20,
+                    }
+                ],
+                "type": "text",
+                "text": "the grass is green",
+            },
+            {"type": "text", "text": " and "},
+            {
+                "citations": [
+                    {
+                        "type": "char_location",
+                        "cited_text": "The sky is blue.",
+                        "document_index": 0,
+                        "document_title": "My Document",
+                        "start_char_index": 20,
+                        "end_char_index": 36,
+                    }
+                ],
+                "type": "text",
+                "text": "the sky is blue",
+            },
+            {"type": "text", "text": "."},
+        ],
+        "stop_reason": "end_turn",
+        "stop_sequence": None,
+        "usage": {
+            "input_tokens": 610,
+            "cache_creation_input_tokens": 0,
+            "cache_read_input_tokens": 0,
+            "output_tokens": 51,
+        },
+    }
+
+    _, citations, _, _, _ = config.extract_response_content(completion_response)
+    assert citations is not None
--- a/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
+++ b/tests/litellm/llms/bedrock/chat/test_converse_transformation.py
@ -40,3 +40,22 @@ def test_transform_usage():
    )
    assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"]
    assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"]
+
+
+def test_transform_thinking_blocks_with_redacted_content():
+    thinking_blocks = [
+        {
+            "reasoningText": {
+                "text": "This is a test",
+                "signature": "test_signature",
+            }
+        },
+        {
+            "redactedContent": "This is a redacted content",
+        },
+    ]
+    config = AmazonConverseConfig()
+    transformed_thinking_blocks = config._transform_thinking_blocks(thinking_blocks)
+    assert len(transformed_thinking_blocks) == 2
+    assert transformed_thinking_blocks[0]["type"] == "thinking"
+    assert transformed_thinking_blocks[1]["type"] == "redacted_thinking"
--- a/tests/litellm/llms/bedrock/chat/test_invoke_handler.py
+++ b/tests/litellm/llms/bedrock/chat/test_invoke_handler.py
@ -0,0 +1,22 @@
+import json
+import os
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+from unittest.mock import MagicMock, patch
+
+from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
+
+
+def test_transform_thinking_blocks_with_redacted_content():
+    thinking_block = {"redactedContent": "This is a redacted content"}
+    decoder = AWSEventStreamDecoder(model="test")
+    transformed_thinking_blocks = decoder.translate_thinking_blocks(thinking_block)
+    assert len(transformed_thinking_blocks) == 1
+    assert transformed_thinking_blocks[0]["type"] == "redacted_thinking"
+    assert transformed_thinking_blocks[0]["data"] == "This is a redacted content"
--- a/tests/litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
+++ b/tests/litellm/llms/hosted_vllm/chat/test_hosted_vllm_chat_transformation.py
@ -0,0 +1,45 @@
+import json
+import os
+import sys
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
+
+
+def test_hosted_vllm_chat_transformation_file_url():
+    config = HostedVLLMChatConfig()
+    video_url = "https://example.com/video.mp4"
+    video_data = f"data:video/mp4;base64,{video_url}"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "file",
+                    "file": {
+                        "file_data": video_data,
+                    },
+                }
+            ],
+        }
+    ]
+    transformed_response = config.transform_request(
+        model="hosted_vllm/llama-3.1-70b-instruct",
+        messages=messages,
+        optional_params={},
+        litellm_params={},
+        headers={},
+    )
+    assert transformed_response["messages"] == [
+        {
+            "role": "user",
+            "content": [{"type": "video_url", "video_url": {"url": video_data}}],
+        }
+    ]
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py