Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)

* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type

allows switching between gemini/vllm easily

* [WIP] redacted thinking tests (#9044)

* WIP: redacted thinking tests

* test: add test for redacted thinking in assistant message

---------

Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com>

* fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion

Fixes https://github.com/BerriAI/litellm/issues/9058

* fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming

Fixes https://github.com/BerriAI/litellm/issues/9058

* fix(bedrock/): support redacted text on streaming + non-streaming

Fixes https://github.com/BerriAI/litellm/issues/9058

* feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy

allows using reasoning effort with thinking models on proxy

* test: update tests

* fix(utils.py): fix linting error

* fix: fix linting errors

* fix: fix linting errors

* fix: fix linting error

* fix: fix linting errors

* fix(anthropic/chat/transformation.py): fix returning citations in chat completion

---------

Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2025-04-19 11:16:37 -07:00 committed by GitHub
parent 6f5629cf64
commit 72cf30c081
20 changed files with 638 additions and 109 deletions

View file

@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import (
ChatCompletionRedactedThinkingBlock,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:
def translate_thinking_blocks(
self, thinking_block: BedrockConverseReasoningContentBlockDelta
) -> Optional[List[ChatCompletionThinkingBlock]]:
) -> Optional[
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
]:
"""
Translate the thinking blocks to a string
"""
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
thinking_blocks_list: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = []
_thinking_block: Optional[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = None
if "text" in thinking_block:
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
_thinking_block["thinking"] = thinking_block["text"]
elif "signature" in thinking_block:
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
_thinking_block["signature"] = thinking_block["signature"]
_thinking_block["thinking"] = "" # consistent with anthropic response
thinking_blocks_list.append(_thinking_block)
elif "redactedContent" in thinking_block:
_thinking_block = ChatCompletionRedactedThinkingBlock(
type="redacted_thinking", data=thinking_block["redactedContent"]
)
if _thinking_block is not None:
thinking_blocks_list.append(_thinking_block)
return thinking_blocks_list
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
usage: Optional[Usage] = None
provider_specific_fields: dict = {}
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
index = int(chunk_data.get("contentBlockIndex", 0))
if "start" in chunk_data:
start_obj = ContentBlockStartEvent(**chunk_data["start"])
self.content_blocks = [] # reset
if (
start_obj is not None
and "toolUse" in start_obj
and start_obj["toolUse"] is not None
):
## check tool name was formatted by litellm
_response_tool_name = start_obj["toolUse"]["name"]
response_tool_name = get_bedrock_tool_name(
response_tool_name=_response_tool_name
)
tool_use = {
"id": start_obj["toolUse"]["toolUseId"],
"type": "function",
"function": {
"name": response_tool_name,
"arguments": "",
},
"index": index,
}
if start_obj is not None:
if "toolUse" in start_obj and start_obj["toolUse"] is not None:
## check tool name was formatted by litellm
_response_tool_name = start_obj["toolUse"]["name"]
response_tool_name = get_bedrock_tool_name(
response_tool_name=_response_tool_name
)
tool_use = {
"id": start_obj["toolUse"]["toolUseId"],
"type": "function",
"function": {
"name": response_tool_name,
"arguments": "",
},
"index": index,
}
elif (
"reasoningContent" in start_obj
and start_obj["reasoningContent"] is not None
): # redacted thinking can be in start object
thinking_blocks = self.translate_thinking_blocks(
start_obj["reasoningContent"]
)
provider_specific_fields = {
"reasoningContent": start_obj["reasoningContent"],
}
elif "delta" in chunk_data:
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
self.content_blocks.append(delta_obj)