mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)
* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type allows switching between gemini/vllm easily * [WIP] redacted thinking tests (#9044) * WIP: redacted thinking tests * test: add test for redacted thinking in assistant message --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(bedrock/): support redacted text on streaming + non-streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy allows using reasoning effort with thinking models on proxy * test: update tests * fix(utils.py): fix linting error * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(anthropic/chat/transformation.py): fix returning citations in chat completion --------- Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
This commit is contained in:
parent
3c463f6715
commit
f08a4e3c06
20 changed files with 638 additions and 109 deletions
|
@ -14,6 +14,7 @@ from litellm.types.llms.openai import ChatCompletionThinkingBlock
|
|||
from litellm.types.utils import (
|
||||
ChatCompletionDeltaToolCall,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
Choices,
|
||||
Delta,
|
||||
EmbeddingResponse,
|
||||
|
@ -486,7 +487,14 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
)
|
||||
|
||||
# Handle thinking models that display `thinking_blocks` within `content`
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
]
|
||||
]
|
||||
] = None
|
||||
if "thinking_blocks" in choice["message"]:
|
||||
thinking_blocks = choice["message"]["thinking_blocks"]
|
||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||
|
|
|
@ -471,3 +471,59 @@ def unpack_defs(schema, defs):
|
|||
unpack_defs(ref, defs)
|
||||
value["items"] = ref
|
||||
continue
|
||||
|
||||
|
||||
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
||||
"""
|
||||
Get mime type for common image URLs
|
||||
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
|
||||
|
||||
Supported by Gemini:
|
||||
application/pdf
|
||||
audio/mpeg
|
||||
audio/mp3
|
||||
audio/wav
|
||||
image/png
|
||||
image/jpeg
|
||||
image/webp
|
||||
text/plain
|
||||
video/mov
|
||||
video/mpeg
|
||||
video/mp4
|
||||
video/mpg
|
||||
video/avi
|
||||
video/wmv
|
||||
video/mpegps
|
||||
video/flv
|
||||
"""
|
||||
url = url.lower()
|
||||
|
||||
# Map file extensions to mime types
|
||||
mime_types = {
|
||||
# Images
|
||||
(".jpg", ".jpeg"): "image/jpeg",
|
||||
(".png",): "image/png",
|
||||
(".webp",): "image/webp",
|
||||
# Videos
|
||||
(".mp4",): "video/mp4",
|
||||
(".mov",): "video/mov",
|
||||
(".mpeg", ".mpg"): "video/mpeg",
|
||||
(".avi",): "video/avi",
|
||||
(".wmv",): "video/wmv",
|
||||
(".mpegps",): "video/mpegps",
|
||||
(".flv",): "video/flv",
|
||||
# Audio
|
||||
(".mp3",): "audio/mp3",
|
||||
(".wav",): "audio/wav",
|
||||
(".mpeg",): "audio/mpeg",
|
||||
# Documents
|
||||
(".pdf",): "application/pdf",
|
||||
(".txt",): "text/plain",
|
||||
}
|
||||
|
||||
# Check each extension group against the URL
|
||||
for extensions, mime_type in mime_types.items():
|
||||
if any(url.endswith(ext) for ext in extensions):
|
||||
return mime_type
|
||||
|
||||
return None
|
||||
|
|
|
@ -2258,6 +2258,14 @@ def _parse_content_type(content_type: str) -> str:
|
|||
return m.get_content_type()
|
||||
|
||||
|
||||
def _parse_mime_type(base64_data: str) -> Optional[str]:
|
||||
mime_type_match = re.match(r"data:(.*?);base64", base64_data)
|
||||
if mime_type_match:
|
||||
return mime_type_match.group(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class BedrockImageProcessor:
|
||||
"""Handles both sync and async image processing for Bedrock conversations."""
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ from litellm.types.llms.anthropic import (
|
|||
UsageDelta,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
)
|
||||
|
@ -501,18 +502,19 @@ class ModelResponseIterator:
|
|||
) -> Tuple[
|
||||
str,
|
||||
Optional[ChatCompletionToolCallChunk],
|
||||
List[ChatCompletionThinkingBlock],
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
|
||||
Dict[str, Any],
|
||||
]:
|
||||
"""
|
||||
Helper function to handle the content block delta
|
||||
"""
|
||||
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
provider_specific_fields = {}
|
||||
content_block = ContentBlockDelta(**chunk) # type: ignore
|
||||
thinking_blocks: List[ChatCompletionThinkingBlock] = []
|
||||
thinking_blocks: List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
] = []
|
||||
|
||||
self.content_blocks.append(content_block)
|
||||
if "text" in content_block["delta"]:
|
||||
|
@ -541,20 +543,25 @@ class ModelResponseIterator:
|
|||
)
|
||||
]
|
||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||
|
||||
return text, tool_use, thinking_blocks, provider_specific_fields
|
||||
|
||||
def _handle_reasoning_content(
|
||||
self, thinking_blocks: List[ChatCompletionThinkingBlock]
|
||||
self,
|
||||
thinking_blocks: List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Handle the reasoning content
|
||||
"""
|
||||
reasoning_content = None
|
||||
for block in thinking_blocks:
|
||||
thinking_content = cast(Optional[str], block.get("thinking"))
|
||||
if reasoning_content is None:
|
||||
reasoning_content = ""
|
||||
if "thinking" in block:
|
||||
reasoning_content += block["thinking"]
|
||||
if thinking_content is not None:
|
||||
reasoning_content += thinking_content
|
||||
return reasoning_content
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||
|
@ -567,7 +574,13 @@ class ModelResponseIterator:
|
|||
usage: Optional[Usage] = None
|
||||
provider_specific_fields: Dict[str, Any] = {}
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[
|
||||
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||
]
|
||||
]
|
||||
] = None
|
||||
|
||||
index = int(chunk.get("index", 0))
|
||||
if type_chunk == "content_block_delta":
|
||||
|
@ -605,6 +618,15 @@ class ModelResponseIterator:
|
|||
},
|
||||
"index": self.tool_index,
|
||||
}
|
||||
elif (
|
||||
content_block_start["content_block"]["type"] == "redacted_thinking"
|
||||
):
|
||||
thinking_blocks = [
|
||||
ChatCompletionRedactedThinkingBlock(
|
||||
type="redacted_thinking",
|
||||
data=content_block_start["content_block"]["data"],
|
||||
)
|
||||
]
|
||||
elif type_chunk == "content_block_stop":
|
||||
ContentBlockStop(**chunk) # type: ignore
|
||||
# check if tool call content block
|
||||
|
|
|
@ -30,6 +30,7 @@ from litellm.types.llms.openai import (
|
|||
REASONING_EFFORT,
|
||||
AllMessageValues,
|
||||
ChatCompletionCachedContent,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
|
@ -575,13 +576,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
|||
) -> Tuple[
|
||||
str,
|
||||
Optional[List[Any]],
|
||||
Optional[List[ChatCompletionThinkingBlock]],
|
||||
Optional[
|
||||
List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
],
|
||||
Optional[str],
|
||||
List[ChatCompletionToolCallChunk],
|
||||
]:
|
||||
text_content = ""
|
||||
citations: Optional[List[Any]] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||
for idx, content in enumerate(completion_response["content"]):
|
||||
|
@ -600,20 +609,30 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
|||
index=idx,
|
||||
)
|
||||
)
|
||||
## CITATIONS
|
||||
if content.get("citations", None) is not None:
|
||||
if citations is None:
|
||||
citations = []
|
||||
citations.append(content["citations"])
|
||||
if content.get("thinking", None) is not None:
|
||||
|
||||
elif content.get("thinking", None) is not None:
|
||||
if thinking_blocks is None:
|
||||
thinking_blocks = []
|
||||
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
||||
elif content["type"] == "redacted_thinking":
|
||||
if thinking_blocks is None:
|
||||
thinking_blocks = []
|
||||
thinking_blocks.append(
|
||||
cast(ChatCompletionRedactedThinkingBlock, content)
|
||||
)
|
||||
|
||||
## CITATIONS
|
||||
if content.get("citations") is not None:
|
||||
if citations is None:
|
||||
citations = []
|
||||
citations.append(content["citations"])
|
||||
if thinking_blocks is not None:
|
||||
reasoning_content = ""
|
||||
for block in thinking_blocks:
|
||||
if "thinking" in block:
|
||||
reasoning_content += block["thinking"]
|
||||
thinking_content = cast(Optional[str], block.get("thinking"))
|
||||
if thinking_content is not None:
|
||||
reasoning_content += thinking_content
|
||||
|
||||
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
||||
|
||||
def calculate_usage(
|
||||
|
@ -703,7 +722,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
|||
else:
|
||||
text_content = ""
|
||||
citations: Optional[List[Any]] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[
|
||||
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||
]
|
||||
]
|
||||
] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMExcepti
|
|||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionResponseMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionThinkingBlock,
|
||||
|
@ -627,9 +628,11 @@ class AmazonConverseConfig(BaseConfig):
|
|||
|
||||
def _transform_thinking_blocks(
|
||||
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
||||
) -> List[ChatCompletionThinkingBlock]:
|
||||
) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
|
||||
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||
thinking_blocks_list: List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
] = []
|
||||
for block in thinking_blocks:
|
||||
if "reasoningText" in block:
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
|
@ -640,6 +643,11 @@ class AmazonConverseConfig(BaseConfig):
|
|||
if _signature is not None:
|
||||
_thinking_block["signature"] = _signature
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
elif "redactedContent" in block:
|
||||
_redacted_block = ChatCompletionRedactedThinkingBlock(
|
||||
type="redacted_thinking", data=block["redactedContent"]
|
||||
)
|
||||
thinking_blocks_list.append(_redacted_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
|
||||
|
|
|
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
)
|
||||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
|
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:
|
|||
|
||||
def translate_thinking_blocks(
|
||||
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
||||
) -> Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
]:
|
||||
"""
|
||||
Translate the thinking blocks to a string
|
||||
"""
|
||||
|
||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
thinking_blocks_list: List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
] = []
|
||||
_thinking_block: Optional[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
] = None
|
||||
|
||||
if "text" in thinking_block:
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
_thinking_block["thinking"] = thinking_block["text"]
|
||||
elif "signature" in thinking_block:
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
_thinking_block["signature"] = thinking_block["signature"]
|
||||
_thinking_block["thinking"] = "" # consistent with anthropic response
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
elif "redactedContent" in thinking_block:
|
||||
_thinking_block = ChatCompletionRedactedThinkingBlock(
|
||||
type="redacted_thinking", data=thinking_block["redactedContent"]
|
||||
)
|
||||
if _thinking_block is not None:
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||
|
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
|
|||
usage: Optional[Usage] = None
|
||||
provider_specific_fields: dict = {}
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[
|
||||
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||
]
|
||||
]
|
||||
] = None
|
||||
|
||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||
if "start" in chunk_data:
|
||||
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
||||
self.content_blocks = [] # reset
|
||||
if (
|
||||
start_obj is not None
|
||||
and "toolUse" in start_obj
|
||||
and start_obj["toolUse"] is not None
|
||||
):
|
||||
## check tool name was formatted by litellm
|
||||
_response_tool_name = start_obj["toolUse"]["name"]
|
||||
response_tool_name = get_bedrock_tool_name(
|
||||
response_tool_name=_response_tool_name
|
||||
)
|
||||
tool_use = {
|
||||
"id": start_obj["toolUse"]["toolUseId"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": response_tool_name,
|
||||
"arguments": "",
|
||||
},
|
||||
"index": index,
|
||||
}
|
||||
if start_obj is not None:
|
||||
if "toolUse" in start_obj and start_obj["toolUse"] is not None:
|
||||
## check tool name was formatted by litellm
|
||||
_response_tool_name = start_obj["toolUse"]["name"]
|
||||
response_tool_name = get_bedrock_tool_name(
|
||||
response_tool_name=_response_tool_name
|
||||
)
|
||||
tool_use = {
|
||||
"id": start_obj["toolUse"]["toolUseId"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": response_tool_name,
|
||||
"arguments": "",
|
||||
},
|
||||
"index": index,
|
||||
}
|
||||
elif (
|
||||
"reasoningContent" in start_obj
|
||||
and start_obj["reasoningContent"] is not None
|
||||
): # redacted thinking can be in start object
|
||||
thinking_blocks = self.translate_thinking_blocks(
|
||||
start_obj["reasoningContent"]
|
||||
)
|
||||
provider_specific_fields = {
|
||||
"reasoningContent": start_obj["reasoningContent"],
|
||||
}
|
||||
elif "delta" in chunk_data:
|
||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||
self.content_blocks.append(delta_obj)
|
||||
|
|
|
@ -37,6 +37,7 @@ from litellm.types.llms.databricks import (
|
|||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolChoiceFunctionParam,
|
||||
ChatCompletionToolChoiceObjectParam,
|
||||
|
@ -314,13 +315,24 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
|
|||
@staticmethod
|
||||
def extract_reasoning_content(
|
||||
content: Optional[AllDatabricksContentValues],
|
||||
) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]:
|
||||
) -> Tuple[
|
||||
Optional[str],
|
||||
Optional[
|
||||
List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
],
|
||||
]:
|
||||
"""
|
||||
Extract and return the reasoning content and thinking blocks
|
||||
"""
|
||||
if content is None:
|
||||
return None, None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
if isinstance(content, list):
|
||||
for item in content:
|
||||
|
|
|
@ -2,9 +2,19 @@
|
|||
Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
|
||||
"""
|
||||
|
||||
from typing import Optional, Tuple
|
||||
from typing import List, Optional, Tuple, cast
|
||||
|
||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||
_get_image_mime_type_from_url,
|
||||
)
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionFileObject,
|
||||
ChatCompletionVideoObject,
|
||||
ChatCompletionVideoUrlObject,
|
||||
)
|
||||
|
||||
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
@ -38,3 +48,71 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
|
|||
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
|
||||
) # vllm does not require an api key
|
||||
return api_base, dynamic_api_key
|
||||
|
||||
def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
|
||||
"""
|
||||
Check if the file is a video
|
||||
|
||||
- format: video/<extension>
|
||||
- file_data: base64 encoded video data
|
||||
- file_id: infer mp4 from extension
|
||||
"""
|
||||
file = content_item.get("file", {})
|
||||
format = file.get("format")
|
||||
file_data = file.get("file_data")
|
||||
file_id = file.get("file_id")
|
||||
if content_item.get("type") != "file":
|
||||
return False
|
||||
if format and format.startswith("video/"):
|
||||
return True
|
||||
elif file_data:
|
||||
mime_type = _parse_mime_type(file_data)
|
||||
if mime_type and mime_type.startswith("video/"):
|
||||
return True
|
||||
elif file_id:
|
||||
mime_type = _get_image_mime_type_from_url(file_id)
|
||||
if mime_type and mime_type.startswith("video/"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _convert_file_to_video_url(
|
||||
self, content_item: ChatCompletionFileObject
|
||||
) -> ChatCompletionVideoObject:
|
||||
file = content_item.get("file", {})
|
||||
file_id = file.get("file_id")
|
||||
file_data = file.get("file_data")
|
||||
|
||||
if file_id:
|
||||
return ChatCompletionVideoObject(
|
||||
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
|
||||
)
|
||||
elif file_data:
|
||||
return ChatCompletionVideoObject(
|
||||
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
|
||||
)
|
||||
raise ValueError("file_id or file_data is required")
|
||||
|
||||
def _transform_messages(
|
||||
self, messages: List[AllMessageValues], model: str
|
||||
) -> List[AllMessageValues]:
|
||||
"""
|
||||
Support translating video files from file_id or file_data to video_url
|
||||
"""
|
||||
for message in messages:
|
||||
if message["role"] == "user":
|
||||
message_content = message.get("content")
|
||||
if message_content and isinstance(message_content, list):
|
||||
replaced_content_items: List[
|
||||
Tuple[int, ChatCompletionFileObject]
|
||||
] = []
|
||||
for idx, content_item in enumerate(message_content):
|
||||
if content_item.get("type") == "file":
|
||||
content_item = cast(ChatCompletionFileObject, content_item)
|
||||
if self._is_video_file(content_item):
|
||||
replaced_content_items.append((idx, content_item))
|
||||
for idx, content_item in replaced_content_items:
|
||||
message_content[idx] = self._convert_file_to_video_url(
|
||||
content_item
|
||||
)
|
||||
transformed_messages = super()._transform_messages(messages, model)
|
||||
return transformed_messages
|
||||
|
|
|
@ -13,6 +13,7 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
|
|||
def get_supported_openai_params(self, model: str) -> List:
|
||||
list = super().get_supported_openai_params(model)
|
||||
list.append("thinking")
|
||||
list.append("reasoning_effort")
|
||||
return list
|
||||
|
||||
def _map_openai_params(
|
||||
|
|
|
@ -12,6 +12,9 @@ from pydantic import BaseModel
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||
_get_image_mime_type_from_url,
|
||||
)
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||
convert_to_anthropic_image_obj,
|
||||
convert_to_gemini_tool_call_invoke,
|
||||
|
@ -99,62 +102,6 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
|
|||
raise e
|
||||
|
||||
|
||||
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
||||
"""
|
||||
Get mime type for common image URLs
|
||||
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
|
||||
|
||||
Supported by Gemini:
|
||||
application/pdf
|
||||
audio/mpeg
|
||||
audio/mp3
|
||||
audio/wav
|
||||
image/png
|
||||
image/jpeg
|
||||
image/webp
|
||||
text/plain
|
||||
video/mov
|
||||
video/mpeg
|
||||
video/mp4
|
||||
video/mpg
|
||||
video/avi
|
||||
video/wmv
|
||||
video/mpegps
|
||||
video/flv
|
||||
"""
|
||||
url = url.lower()
|
||||
|
||||
# Map file extensions to mime types
|
||||
mime_types = {
|
||||
# Images
|
||||
(".jpg", ".jpeg"): "image/jpeg",
|
||||
(".png",): "image/png",
|
||||
(".webp",): "image/webp",
|
||||
# Videos
|
||||
(".mp4",): "video/mp4",
|
||||
(".mov",): "video/mov",
|
||||
(".mpeg", ".mpg"): "video/mpeg",
|
||||
(".avi",): "video/avi",
|
||||
(".wmv",): "video/wmv",
|
||||
(".mpegps",): "video/mpegps",
|
||||
(".flv",): "video/flv",
|
||||
# Audio
|
||||
(".mp3",): "audio/mp3",
|
||||
(".wav",): "audio/wav",
|
||||
(".mpeg",): "audio/mpeg",
|
||||
# Documents
|
||||
(".pdf",): "application/pdf",
|
||||
(".txt",): "text/plain",
|
||||
}
|
||||
|
||||
# Check each extension group against the URL
|
||||
for extensions, mime_type in mime_types.items():
|
||||
if any(url.endswith(ext) for ext in extensions):
|
||||
return mime_type
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _gemini_convert_messages_with_history( # noqa: PLR0915
|
||||
messages: List[AllMessageValues],
|
||||
) -> List[ContentType]:
|
||||
|
|
|
@ -179,6 +179,7 @@ class ToolUseBlockStartEvent(TypedDict):
|
|||
|
||||
class ContentBlockStartEvent(TypedDict, total=False):
|
||||
toolUse: Optional[ToolUseBlockStartEvent]
|
||||
reasoningContent: BedrockConverseReasoningContentBlockDelta
|
||||
|
||||
|
||||
class ContentBlockDeltaEvent(TypedDict, total=False):
|
||||
|
|
|
@ -468,6 +468,12 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
|
|||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
|
||||
type: Required[Literal["redacted_thinking"]]
|
||||
data: str
|
||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||
|
||||
|
||||
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
|
||||
city: str
|
||||
"""Free text input for the city of the user, e.g. `San Francisco`."""
|
||||
|
@ -797,7 +803,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
|
|||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||
provider_specific_fields: Optional[dict]
|
||||
reasoning_content: Optional[str]
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
|
||||
thinking_blocks: Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
]
|
||||
|
||||
|
||||
class ChatCompletionUsageBlock(TypedDict):
|
||||
|
|
|
@ -29,6 +29,7 @@ from .guardrails import GuardrailEventHooks
|
|||
from .llms.openai import (
|
||||
Batch,
|
||||
ChatCompletionAnnotation,
|
||||
ChatCompletionRedactedThinkingBlock,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
|
@ -552,7 +553,9 @@ class Message(OpenAIObject):
|
|||
function_call: Optional[FunctionCall]
|
||||
audio: Optional[ChatCompletionAudioResponse] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
] = None
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
||||
default=None, exclude=True
|
||||
)
|
||||
|
@ -567,7 +570,11 @@ class Message(OpenAIObject):
|
|||
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
||||
reasoning_content: Optional[str] = None,
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
] = None,
|
||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||
**params,
|
||||
):
|
||||
|
@ -650,7 +657,9 @@ class Message(OpenAIObject):
|
|||
|
||||
class Delta(OpenAIObject):
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
thinking_blocks: Optional[
|
||||
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||
] = None
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||
|
||||
def __init__(
|
||||
|
@ -661,7 +670,11 @@ class Delta(OpenAIObject):
|
|||
tool_calls=None,
|
||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||
reasoning_content: Optional[str] = None,
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
||||
thinking_blocks: Optional[
|
||||
List[
|
||||
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||
]
|
||||
] = None,
|
||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||
**params,
|
||||
):
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.llms.anthropic.chat.handler import ModelResponseIterator
|
||||
|
||||
|
||||
def test_redacted_thinking_content_block_delta():
|
||||
chunk = {
|
||||
"type": "content_block_start",
|
||||
"index": 58,
|
||||
"content_block": {
|
||||
"type": "redacted_thinking",
|
||||
"data": "EuoBCoYBGAIiQJ/SxkPAgqxhKok29YrpJHRUJ0OT8ahCHKAwyhmRuUhtdmDX9+mn4gDzKNv3fVpQdB01zEPMzNY3QuTCd+1bdtEqQK6JuKHqdndbwpr81oVWb4wxd1GqF/7Jkw74IlQa27oobX+KuRkopr9Dllt/RDe7Se0sI1IkU7tJIAQCoP46OAwSDF51P09q67xhHlQ3ihoM2aOVlkghq/X0w8NlIjBMNvXYNbjhyrOcIg6kPFn2ed/KK7Cm5prYAtXCwkb4Wr5tUSoSHu9T5hKdJRbr6WsqEc7Lle7FULqMLZGkhqXyc3BA",
|
||||
},
|
||||
}
|
||||
model_response_iterator = ModelResponseIterator(
|
||||
streaming_response=MagicMock(), sync_stream=False, json_mode=False
|
||||
)
|
||||
model_response = model_response_iterator.chunk_parser(chunk=chunk)
|
||||
print(f"\n\nmodel_response: {model_response}\n\n")
|
||||
assert model_response.choices[0].delta.thinking_blocks is not None
|
||||
assert len(model_response.choices[0].delta.thinking_blocks) == 1
|
||||
print(
|
||||
f"\n\nmodel_response.choices[0].delta.thinking_blocks[0]: {model_response.choices[0].delta.thinking_blocks[0]}\n\n"
|
||||
)
|
||||
assert (
|
||||
model_response.choices[0].delta.thinking_blocks[0]["type"]
|
||||
== "redacted_thinking"
|
||||
)
|
|
@ -56,3 +56,58 @@ def test_calculate_usage():
|
|||
assert usage.prompt_tokens_details.cached_tokens == 0
|
||||
assert usage._cache_creation_input_tokens == 12304
|
||||
assert usage._cache_read_input_tokens == 0
|
||||
|
||||
|
||||
def test_extract_response_content_with_citations():
|
||||
config = AnthropicConfig()
|
||||
|
||||
completion_response = {
|
||||
"id": "msg_01XrAv7gc5tQNDuoADra7vB4",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"model": "claude-3-5-sonnet-20241022",
|
||||
"content": [
|
||||
{"type": "text", "text": "According to the documents, "},
|
||||
{
|
||||
"citations": [
|
||||
{
|
||||
"type": "char_location",
|
||||
"cited_text": "The grass is green. ",
|
||||
"document_index": 0,
|
||||
"document_title": "My Document",
|
||||
"start_char_index": 0,
|
||||
"end_char_index": 20,
|
||||
}
|
||||
],
|
||||
"type": "text",
|
||||
"text": "the grass is green",
|
||||
},
|
||||
{"type": "text", "text": " and "},
|
||||
{
|
||||
"citations": [
|
||||
{
|
||||
"type": "char_location",
|
||||
"cited_text": "The sky is blue.",
|
||||
"document_index": 0,
|
||||
"document_title": "My Document",
|
||||
"start_char_index": 20,
|
||||
"end_char_index": 36,
|
||||
}
|
||||
],
|
||||
"type": "text",
|
||||
"text": "the sky is blue",
|
||||
},
|
||||
{"type": "text", "text": "."},
|
||||
],
|
||||
"stop_reason": "end_turn",
|
||||
"stop_sequence": None,
|
||||
"usage": {
|
||||
"input_tokens": 610,
|
||||
"cache_creation_input_tokens": 0,
|
||||
"cache_read_input_tokens": 0,
|
||||
"output_tokens": 51,
|
||||
},
|
||||
}
|
||||
|
||||
_, citations, _, _, _ = config.extract_response_content(completion_response)
|
||||
assert citations is not None
|
||||
|
|
|
@ -40,3 +40,22 @@ def test_transform_usage():
|
|||
)
|
||||
assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"]
|
||||
assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"]
|
||||
|
||||
|
||||
def test_transform_thinking_blocks_with_redacted_content():
|
||||
thinking_blocks = [
|
||||
{
|
||||
"reasoningText": {
|
||||
"text": "This is a test",
|
||||
"signature": "test_signature",
|
||||
}
|
||||
},
|
||||
{
|
||||
"redactedContent": "This is a redacted content",
|
||||
},
|
||||
]
|
||||
config = AmazonConverseConfig()
|
||||
transformed_thinking_blocks = config._transform_thinking_blocks(thinking_blocks)
|
||||
assert len(transformed_thinking_blocks) == 2
|
||||
assert transformed_thinking_blocks[0]["type"] == "thinking"
|
||||
assert transformed_thinking_blocks[1]["type"] == "redacted_thinking"
|
||||
|
|
22
tests/litellm/llms/bedrock/chat/test_invoke_handler.py
Normal file
22
tests/litellm/llms/bedrock/chat/test_invoke_handler.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
|
||||
|
||||
|
||||
def test_transform_thinking_blocks_with_redacted_content():
|
||||
thinking_block = {"redactedContent": "This is a redacted content"}
|
||||
decoder = AWSEventStreamDecoder(model="test")
|
||||
transformed_thinking_blocks = decoder.translate_thinking_blocks(thinking_block)
|
||||
assert len(transformed_thinking_blocks) == 1
|
||||
assert transformed_thinking_blocks[0]["type"] == "redacted_thinking"
|
||||
assert transformed_thinking_blocks[0]["data"] == "This is a redacted content"
|
|
@ -0,0 +1,45 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
|
||||
|
||||
|
||||
def test_hosted_vllm_chat_transformation_file_url():
|
||||
config = HostedVLLMChatConfig()
|
||||
video_url = "https://example.com/video.mp4"
|
||||
video_data = f"data:video/mp4;base64,{video_url}"
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": video_data,
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
transformed_response = config.transform_request(
|
||||
model="hosted_vllm/llama-3.1-70b-instruct",
|
||||
messages=messages,
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
headers={},
|
||||
)
|
||||
assert transformed_response["messages"] == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "video_url", "video_url": {"url": video_data}}],
|
||||
}
|
||||
]
|
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue