mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)
* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type allows switching between gemini/vllm easily * [WIP] redacted thinking tests (#9044) * WIP: redacted thinking tests * test: add test for redacted thinking in assistant message --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(bedrock/): support redacted text on streaming + non-streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy allows using reasoning effort with thinking models on proxy * test: update tests * fix(utils.py): fix linting error * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(anthropic/chat/transformation.py): fix returning citations in chat completion --------- Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
This commit is contained in:
parent
3c463f6715
commit
f08a4e3c06
20 changed files with 638 additions and 109 deletions
|
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
)
|
||||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
|
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:
|
|||
|
||||
def translate_thinking_blocks(
|
||||
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
||||
) -> Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
]:
|
||||
"""
|
||||
Translate the thinking blocks to a string
|
||||
"""
|
||||
|
||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
thinking_blocks_list: List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
] = []
|
||||
_thinking_block: Optional[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
] = None
|
||||
|
||||
if "text" in thinking_block:
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
_thinking_block["thinking"] = thinking_block["text"]
|
||||
elif "signature" in thinking_block:
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
_thinking_block["signature"] = thinking_block["signature"]
|
||||
_thinking_block["thinking"] = "" # consistent with anthropic response
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
elif "redactedContent" in thinking_block:
|
||||
_thinking_block = ChatCompletionRedactedThinkingBlock(
|
||||
type="redacted_thinking", data=thinking_block["redactedContent"]
|
||||
)
|
||||
if _thinking_block is not None:
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||
|
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
|
|||
usage: Optional[Usage] = None
|
||||
provider_specific_fields: dict = {}
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[
|
||||
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||
]
|
||||
]
|
||||
] = None
|
||||
|
||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||
if "start" in chunk_data:
|
||||
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
||||
self.content_blocks = [] # reset
|
||||
if (
|
||||
start_obj is not None
|
||||
and "toolUse" in start_obj
|
||||
and start_obj["toolUse"] is not None
|
||||
):
|
||||
## check tool name was formatted by litellm
|
||||
_response_tool_name = start_obj["toolUse"]["name"]
|
||||
response_tool_name = get_bedrock_tool_name(
|
||||
response_tool_name=_response_tool_name
|
||||
)
|
||||
tool_use = {
|
||||
"id": start_obj["toolUse"]["toolUseId"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": response_tool_name,
|
||||
"arguments": "",
|
||||
},
|
||||
"index": index,
|
||||
}
|
||||
if start_obj is not None:
|
||||
if "toolUse" in start_obj and start_obj["toolUse"] is not None:
|
||||
## check tool name was formatted by litellm
|
||||
_response_tool_name = start_obj["toolUse"]["name"]
|
||||
response_tool_name = get_bedrock_tool_name(
|
||||
response_tool_name=_response_tool_name
|
||||
)
|
||||
tool_use = {
|
||||
"id": start_obj["toolUse"]["toolUseId"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": response_tool_name,
|
||||
"arguments": "",
|
||||
},
|
||||
"index": index,
|
||||
}
|
||||
elif (
|
||||
"reasoningContent" in start_obj
|
||||
and start_obj["reasoningContent"] is not None
|
||||
): # redacted thinking can be in start object
|
||||
thinking_blocks = self.translate_thinking_blocks(
|
||||
start_obj["reasoningContent"]
|
||||
)
|
||||
provider_specific_fields = {
|
||||
"reasoningContent": start_obj["reasoningContent"],
|
||||
}
|
||||
elif "delta" in chunk_data:
|
||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||
self.content_blocks.append(delta_obj)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue