mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)
* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type allows switching between gemini/vllm easily * [WIP] redacted thinking tests (#9044) * WIP: redacted thinking tests * test: add test for redacted thinking in assistant message --------- Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com> * fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * fix(bedrock/): support redacted text on streaming + non-streaming Fixes https://github.com/BerriAI/litellm/issues/9058 * feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy allows using reasoning effort with thinking models on proxy * test: update tests * fix(utils.py): fix linting error * fix: fix linting errors * fix: fix linting errors * fix: fix linting error * fix: fix linting errors * fix(anthropic/chat/transformation.py): fix returning citations in chat completion --------- Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
This commit is contained in:
parent
6f5629cf64
commit
72cf30c081
20 changed files with 638 additions and 109 deletions
|
@ -14,6 +14,7 @@ from litellm.types.llms.openai import ChatCompletionThinkingBlock
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
ChatCompletionDeltaToolCall,
|
ChatCompletionDeltaToolCall,
|
||||||
ChatCompletionMessageToolCall,
|
ChatCompletionMessageToolCall,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
Choices,
|
Choices,
|
||||||
Delta,
|
Delta,
|
||||||
EmbeddingResponse,
|
EmbeddingResponse,
|
||||||
|
@ -486,7 +487,14 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle thinking models that display `thinking_blocks` within `content`
|
# Handle thinking models that display `thinking_blocks` within `content`
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
if "thinking_blocks" in choice["message"]:
|
if "thinking_blocks" in choice["message"]:
|
||||||
thinking_blocks = choice["message"]["thinking_blocks"]
|
thinking_blocks = choice["message"]["thinking_blocks"]
|
||||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||||
|
|
|
@ -471,3 +471,59 @@ def unpack_defs(schema, defs):
|
||||||
unpack_defs(ref, defs)
|
unpack_defs(ref, defs)
|
||||||
value["items"] = ref
|
value["items"] = ref
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Get mime type for common image URLs
|
||||||
|
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
|
||||||
|
|
||||||
|
Supported by Gemini:
|
||||||
|
application/pdf
|
||||||
|
audio/mpeg
|
||||||
|
audio/mp3
|
||||||
|
audio/wav
|
||||||
|
image/png
|
||||||
|
image/jpeg
|
||||||
|
image/webp
|
||||||
|
text/plain
|
||||||
|
video/mov
|
||||||
|
video/mpeg
|
||||||
|
video/mp4
|
||||||
|
video/mpg
|
||||||
|
video/avi
|
||||||
|
video/wmv
|
||||||
|
video/mpegps
|
||||||
|
video/flv
|
||||||
|
"""
|
||||||
|
url = url.lower()
|
||||||
|
|
||||||
|
# Map file extensions to mime types
|
||||||
|
mime_types = {
|
||||||
|
# Images
|
||||||
|
(".jpg", ".jpeg"): "image/jpeg",
|
||||||
|
(".png",): "image/png",
|
||||||
|
(".webp",): "image/webp",
|
||||||
|
# Videos
|
||||||
|
(".mp4",): "video/mp4",
|
||||||
|
(".mov",): "video/mov",
|
||||||
|
(".mpeg", ".mpg"): "video/mpeg",
|
||||||
|
(".avi",): "video/avi",
|
||||||
|
(".wmv",): "video/wmv",
|
||||||
|
(".mpegps",): "video/mpegps",
|
||||||
|
(".flv",): "video/flv",
|
||||||
|
# Audio
|
||||||
|
(".mp3",): "audio/mp3",
|
||||||
|
(".wav",): "audio/wav",
|
||||||
|
(".mpeg",): "audio/mpeg",
|
||||||
|
# Documents
|
||||||
|
(".pdf",): "application/pdf",
|
||||||
|
(".txt",): "text/plain",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check each extension group against the URL
|
||||||
|
for extensions, mime_type in mime_types.items():
|
||||||
|
if any(url.endswith(ext) for ext in extensions):
|
||||||
|
return mime_type
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
|
@ -2258,6 +2258,14 @@ def _parse_content_type(content_type: str) -> str:
|
||||||
return m.get_content_type()
|
return m.get_content_type()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_mime_type(base64_data: str) -> Optional[str]:
|
||||||
|
mime_type_match = re.match(r"data:(.*?);base64", base64_data)
|
||||||
|
if mime_type_match:
|
||||||
|
return mime_type_match.group(1)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class BedrockImageProcessor:
|
class BedrockImageProcessor:
|
||||||
"""Handles both sync and async image processing for Bedrock conversations."""
|
"""Handles both sync and async image processing for Bedrock conversations."""
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ from litellm.types.llms.anthropic import (
|
||||||
UsageDelta,
|
UsageDelta,
|
||||||
)
|
)
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
)
|
)
|
||||||
|
@ -501,18 +502,19 @@ class ModelResponseIterator:
|
||||||
) -> Tuple[
|
) -> Tuple[
|
||||||
str,
|
str,
|
||||||
Optional[ChatCompletionToolCallChunk],
|
Optional[ChatCompletionToolCallChunk],
|
||||||
List[ChatCompletionThinkingBlock],
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
|
||||||
Dict[str, Any],
|
Dict[str, Any],
|
||||||
]:
|
]:
|
||||||
"""
|
"""
|
||||||
Helper function to handle the content block delta
|
Helper function to handle the content block delta
|
||||||
"""
|
"""
|
||||||
|
|
||||||
text = ""
|
text = ""
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
provider_specific_fields = {}
|
provider_specific_fields = {}
|
||||||
content_block = ContentBlockDelta(**chunk) # type: ignore
|
content_block = ContentBlockDelta(**chunk) # type: ignore
|
||||||
thinking_blocks: List[ChatCompletionThinkingBlock] = []
|
thinking_blocks: List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = []
|
||||||
|
|
||||||
self.content_blocks.append(content_block)
|
self.content_blocks.append(content_block)
|
||||||
if "text" in content_block["delta"]:
|
if "text" in content_block["delta"]:
|
||||||
|
@ -541,20 +543,25 @@ class ModelResponseIterator:
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||||
|
|
||||||
return text, tool_use, thinking_blocks, provider_specific_fields
|
return text, tool_use, thinking_blocks, provider_specific_fields
|
||||||
|
|
||||||
def _handle_reasoning_content(
|
def _handle_reasoning_content(
|
||||||
self, thinking_blocks: List[ChatCompletionThinkingBlock]
|
self,
|
||||||
|
thinking_blocks: List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
],
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Handle the reasoning content
|
Handle the reasoning content
|
||||||
"""
|
"""
|
||||||
reasoning_content = None
|
reasoning_content = None
|
||||||
for block in thinking_blocks:
|
for block in thinking_blocks:
|
||||||
|
thinking_content = cast(Optional[str], block.get("thinking"))
|
||||||
if reasoning_content is None:
|
if reasoning_content is None:
|
||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
if "thinking" in block:
|
if thinking_content is not None:
|
||||||
reasoning_content += block["thinking"]
|
reasoning_content += thinking_content
|
||||||
return reasoning_content
|
return reasoning_content
|
||||||
|
|
||||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||||
|
@ -567,7 +574,13 @@ class ModelResponseIterator:
|
||||||
usage: Optional[Usage] = None
|
usage: Optional[Usage] = None
|
||||||
provider_specific_fields: Dict[str, Any] = {}
|
provider_specific_fields: Dict[str, Any] = {}
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
|
|
||||||
index = int(chunk.get("index", 0))
|
index = int(chunk.get("index", 0))
|
||||||
if type_chunk == "content_block_delta":
|
if type_chunk == "content_block_delta":
|
||||||
|
@ -605,6 +618,15 @@ class ModelResponseIterator:
|
||||||
},
|
},
|
||||||
"index": self.tool_index,
|
"index": self.tool_index,
|
||||||
}
|
}
|
||||||
|
elif (
|
||||||
|
content_block_start["content_block"]["type"] == "redacted_thinking"
|
||||||
|
):
|
||||||
|
thinking_blocks = [
|
||||||
|
ChatCompletionRedactedThinkingBlock(
|
||||||
|
type="redacted_thinking",
|
||||||
|
data=content_block_start["content_block"]["data"],
|
||||||
|
)
|
||||||
|
]
|
||||||
elif type_chunk == "content_block_stop":
|
elif type_chunk == "content_block_stop":
|
||||||
ContentBlockStop(**chunk) # type: ignore
|
ContentBlockStop(**chunk) # type: ignore
|
||||||
# check if tool call content block
|
# check if tool call content block
|
||||||
|
|
|
@ -30,6 +30,7 @@ from litellm.types.llms.openai import (
|
||||||
REASONING_EFFORT,
|
REASONING_EFFORT,
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
ChatCompletionCachedContent,
|
ChatCompletionCachedContent,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionSystemMessage,
|
ChatCompletionSystemMessage,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
|
@ -575,13 +576,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
) -> Tuple[
|
) -> Tuple[
|
||||||
str,
|
str,
|
||||||
Optional[List[Any]],
|
Optional[List[Any]],
|
||||||
Optional[List[ChatCompletionThinkingBlock]],
|
Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
],
|
||||||
Optional[str],
|
Optional[str],
|
||||||
List[ChatCompletionToolCallChunk],
|
List[ChatCompletionToolCallChunk],
|
||||||
]:
|
]:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
citations: Optional[List[Any]] = None
|
citations: Optional[List[Any]] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||||
for idx, content in enumerate(completion_response["content"]):
|
for idx, content in enumerate(completion_response["content"]):
|
||||||
|
@ -600,20 +609,30 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
index=idx,
|
index=idx,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
## CITATIONS
|
|
||||||
if content.get("citations", None) is not None:
|
elif content.get("thinking", None) is not None:
|
||||||
if citations is None:
|
|
||||||
citations = []
|
|
||||||
citations.append(content["citations"])
|
|
||||||
if content.get("thinking", None) is not None:
|
|
||||||
if thinking_blocks is None:
|
if thinking_blocks is None:
|
||||||
thinking_blocks = []
|
thinking_blocks = []
|
||||||
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
||||||
|
elif content["type"] == "redacted_thinking":
|
||||||
|
if thinking_blocks is None:
|
||||||
|
thinking_blocks = []
|
||||||
|
thinking_blocks.append(
|
||||||
|
cast(ChatCompletionRedactedThinkingBlock, content)
|
||||||
|
)
|
||||||
|
|
||||||
|
## CITATIONS
|
||||||
|
if content.get("citations") is not None:
|
||||||
|
if citations is None:
|
||||||
|
citations = []
|
||||||
|
citations.append(content["citations"])
|
||||||
if thinking_blocks is not None:
|
if thinking_blocks is not None:
|
||||||
reasoning_content = ""
|
reasoning_content = ""
|
||||||
for block in thinking_blocks:
|
for block in thinking_blocks:
|
||||||
if "thinking" in block:
|
thinking_content = cast(Optional[str], block.get("thinking"))
|
||||||
reasoning_content += block["thinking"]
|
if thinking_content is not None:
|
||||||
|
reasoning_content += thinking_content
|
||||||
|
|
||||||
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
||||||
|
|
||||||
def calculate_usage(
|
def calculate_usage(
|
||||||
|
@ -703,7 +722,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||||
else:
|
else:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
citations: Optional[List[Any]] = None
|
citations: Optional[List[Any]] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMExcepti
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionResponseMessage,
|
ChatCompletionResponseMessage,
|
||||||
ChatCompletionSystemMessage,
|
ChatCompletionSystemMessage,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
|
@ -627,9 +628,11 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
|
|
||||||
def _transform_thinking_blocks(
|
def _transform_thinking_blocks(
|
||||||
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
||||||
) -> List[ChatCompletionThinkingBlock]:
|
) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
|
||||||
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
||||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
thinking_blocks_list: List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = []
|
||||||
for block in thinking_blocks:
|
for block in thinking_blocks:
|
||||||
if "reasoningText" in block:
|
if "reasoningText" in block:
|
||||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
|
@ -640,6 +643,11 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
if _signature is not None:
|
if _signature is not None:
|
||||||
_thinking_block["signature"] = _signature
|
_thinking_block["signature"] = _signature
|
||||||
thinking_blocks_list.append(_thinking_block)
|
thinking_blocks_list.append(_thinking_block)
|
||||||
|
elif "redactedContent" in block:
|
||||||
|
_redacted_block = ChatCompletionRedactedThinkingBlock(
|
||||||
|
type="redacted_thinking", data=block["redactedContent"]
|
||||||
|
)
|
||||||
|
thinking_blocks_list.append(_redacted_block)
|
||||||
return thinking_blocks_list
|
return thinking_blocks_list
|
||||||
|
|
||||||
def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
|
def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:
|
||||||
|
|
|
@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
|
@ -1255,18 +1256,32 @@ class AWSEventStreamDecoder:
|
||||||
|
|
||||||
def translate_thinking_blocks(
|
def translate_thinking_blocks(
|
||||||
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||||
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
) -> Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Translate the thinking blocks to a string
|
Translate the thinking blocks to a string
|
||||||
"""
|
"""
|
||||||
|
|
||||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
thinking_blocks_list: List[
|
||||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = []
|
||||||
|
_thinking_block: Optional[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
] = None
|
||||||
|
|
||||||
if "text" in thinking_block:
|
if "text" in thinking_block:
|
||||||
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
_thinking_block["thinking"] = thinking_block["text"]
|
_thinking_block["thinking"] = thinking_block["text"]
|
||||||
elif "signature" in thinking_block:
|
elif "signature" in thinking_block:
|
||||||
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
_thinking_block["signature"] = thinking_block["signature"]
|
_thinking_block["signature"] = thinking_block["signature"]
|
||||||
_thinking_block["thinking"] = "" # consistent with anthropic response
|
_thinking_block["thinking"] = "" # consistent with anthropic response
|
||||||
|
elif "redactedContent" in thinking_block:
|
||||||
|
_thinking_block = ChatCompletionRedactedThinkingBlock(
|
||||||
|
type="redacted_thinking", data=thinking_block["redactedContent"]
|
||||||
|
)
|
||||||
|
if _thinking_block is not None:
|
||||||
thinking_blocks_list.append(_thinking_block)
|
thinking_blocks_list.append(_thinking_block)
|
||||||
return thinking_blocks_list
|
return thinking_blocks_list
|
||||||
|
|
||||||
|
@ -1279,17 +1294,20 @@ class AWSEventStreamDecoder:
|
||||||
usage: Optional[Usage] = None
|
usage: Optional[Usage] = None
|
||||||
provider_specific_fields: dict = {}
|
provider_specific_fields: dict = {}
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[
|
||||||
|
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
|
||||||
|
]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
|
|
||||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||||
if "start" in chunk_data:
|
if "start" in chunk_data:
|
||||||
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
start_obj = ContentBlockStartEvent(**chunk_data["start"])
|
||||||
self.content_blocks = [] # reset
|
self.content_blocks = [] # reset
|
||||||
if (
|
if start_obj is not None:
|
||||||
start_obj is not None
|
if "toolUse" in start_obj and start_obj["toolUse"] is not None:
|
||||||
and "toolUse" in start_obj
|
|
||||||
and start_obj["toolUse"] is not None
|
|
||||||
):
|
|
||||||
## check tool name was formatted by litellm
|
## check tool name was formatted by litellm
|
||||||
_response_tool_name = start_obj["toolUse"]["name"]
|
_response_tool_name = start_obj["toolUse"]["name"]
|
||||||
response_tool_name = get_bedrock_tool_name(
|
response_tool_name = get_bedrock_tool_name(
|
||||||
|
@ -1304,6 +1322,16 @@ class AWSEventStreamDecoder:
|
||||||
},
|
},
|
||||||
"index": index,
|
"index": index,
|
||||||
}
|
}
|
||||||
|
elif (
|
||||||
|
"reasoningContent" in start_obj
|
||||||
|
and start_obj["reasoningContent"] is not None
|
||||||
|
): # redacted thinking can be in start object
|
||||||
|
thinking_blocks = self.translate_thinking_blocks(
|
||||||
|
start_obj["reasoningContent"]
|
||||||
|
)
|
||||||
|
provider_specific_fields = {
|
||||||
|
"reasoningContent": start_obj["reasoningContent"],
|
||||||
|
}
|
||||||
elif "delta" in chunk_data:
|
elif "delta" in chunk_data:
|
||||||
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
|
||||||
self.content_blocks.append(delta_obj)
|
self.content_blocks.append(delta_obj)
|
||||||
|
|
|
@ -37,6 +37,7 @@ from litellm.types.llms.databricks import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolChoiceFunctionParam,
|
ChatCompletionToolChoiceFunctionParam,
|
||||||
ChatCompletionToolChoiceObjectParam,
|
ChatCompletionToolChoiceObjectParam,
|
||||||
|
@ -314,13 +315,24 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_reasoning_content(
|
def extract_reasoning_content(
|
||||||
content: Optional[AllDatabricksContentValues],
|
content: Optional[AllDatabricksContentValues],
|
||||||
) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]:
|
) -> Tuple[
|
||||||
|
Optional[str],
|
||||||
|
Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Extract and return the reasoning content and thinking blocks
|
Extract and return the reasoning content and thinking blocks
|
||||||
"""
|
"""
|
||||||
if content is None:
|
if content is None:
|
||||||
return None, None
|
return None, None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
for item in content:
|
for item in content:
|
||||||
|
|
|
@ -2,9 +2,19 @@
|
||||||
Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
|
Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import List, Optional, Tuple, cast
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
|
_get_image_mime_type_from_url,
|
||||||
|
)
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
AllMessageValues,
|
||||||
|
ChatCompletionFileObject,
|
||||||
|
ChatCompletionVideoObject,
|
||||||
|
ChatCompletionVideoUrlObject,
|
||||||
|
)
|
||||||
|
|
||||||
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
from ....utils import _remove_additional_properties, _remove_strict_from_schema
|
||||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
|
@ -38,3 +48,71 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
|
||||||
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
|
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
|
||||||
) # vllm does not require an api key
|
) # vllm does not require an api key
|
||||||
return api_base, dynamic_api_key
|
return api_base, dynamic_api_key
|
||||||
|
|
||||||
|
def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the file is a video
|
||||||
|
|
||||||
|
- format: video/<extension>
|
||||||
|
- file_data: base64 encoded video data
|
||||||
|
- file_id: infer mp4 from extension
|
||||||
|
"""
|
||||||
|
file = content_item.get("file", {})
|
||||||
|
format = file.get("format")
|
||||||
|
file_data = file.get("file_data")
|
||||||
|
file_id = file.get("file_id")
|
||||||
|
if content_item.get("type") != "file":
|
||||||
|
return False
|
||||||
|
if format and format.startswith("video/"):
|
||||||
|
return True
|
||||||
|
elif file_data:
|
||||||
|
mime_type = _parse_mime_type(file_data)
|
||||||
|
if mime_type and mime_type.startswith("video/"):
|
||||||
|
return True
|
||||||
|
elif file_id:
|
||||||
|
mime_type = _get_image_mime_type_from_url(file_id)
|
||||||
|
if mime_type and mime_type.startswith("video/"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _convert_file_to_video_url(
|
||||||
|
self, content_item: ChatCompletionFileObject
|
||||||
|
) -> ChatCompletionVideoObject:
|
||||||
|
file = content_item.get("file", {})
|
||||||
|
file_id = file.get("file_id")
|
||||||
|
file_data = file.get("file_data")
|
||||||
|
|
||||||
|
if file_id:
|
||||||
|
return ChatCompletionVideoObject(
|
||||||
|
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
|
||||||
|
)
|
||||||
|
elif file_data:
|
||||||
|
return ChatCompletionVideoObject(
|
||||||
|
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
|
||||||
|
)
|
||||||
|
raise ValueError("file_id or file_data is required")
|
||||||
|
|
||||||
|
def _transform_messages(
|
||||||
|
self, messages: List[AllMessageValues], model: str
|
||||||
|
) -> List[AllMessageValues]:
|
||||||
|
"""
|
||||||
|
Support translating video files from file_id or file_data to video_url
|
||||||
|
"""
|
||||||
|
for message in messages:
|
||||||
|
if message["role"] == "user":
|
||||||
|
message_content = message.get("content")
|
||||||
|
if message_content and isinstance(message_content, list):
|
||||||
|
replaced_content_items: List[
|
||||||
|
Tuple[int, ChatCompletionFileObject]
|
||||||
|
] = []
|
||||||
|
for idx, content_item in enumerate(message_content):
|
||||||
|
if content_item.get("type") == "file":
|
||||||
|
content_item = cast(ChatCompletionFileObject, content_item)
|
||||||
|
if self._is_video_file(content_item):
|
||||||
|
replaced_content_items.append((idx, content_item))
|
||||||
|
for idx, content_item in replaced_content_items:
|
||||||
|
message_content[idx] = self._convert_file_to_video_url(
|
||||||
|
content_item
|
||||||
|
)
|
||||||
|
transformed_messages = super()._transform_messages(messages, model)
|
||||||
|
return transformed_messages
|
||||||
|
|
|
@ -13,6 +13,7 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
|
||||||
def get_supported_openai_params(self, model: str) -> List:
|
def get_supported_openai_params(self, model: str) -> List:
|
||||||
list = super().get_supported_openai_params(model)
|
list = super().get_supported_openai_params(model)
|
||||||
list.append("thinking")
|
list.append("thinking")
|
||||||
|
list.append("reasoning_effort")
|
||||||
return list
|
return list
|
||||||
|
|
||||||
def _map_openai_params(
|
def _map_openai_params(
|
||||||
|
|
|
@ -12,6 +12,9 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
||||||
|
_get_image_mime_type_from_url,
|
||||||
|
)
|
||||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||||
convert_to_anthropic_image_obj,
|
convert_to_anthropic_image_obj,
|
||||||
convert_to_gemini_tool_call_invoke,
|
convert_to_gemini_tool_call_invoke,
|
||||||
|
@ -99,62 +102,6 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Get mime type for common image URLs
|
|
||||||
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
|
|
||||||
|
|
||||||
Supported by Gemini:
|
|
||||||
application/pdf
|
|
||||||
audio/mpeg
|
|
||||||
audio/mp3
|
|
||||||
audio/wav
|
|
||||||
image/png
|
|
||||||
image/jpeg
|
|
||||||
image/webp
|
|
||||||
text/plain
|
|
||||||
video/mov
|
|
||||||
video/mpeg
|
|
||||||
video/mp4
|
|
||||||
video/mpg
|
|
||||||
video/avi
|
|
||||||
video/wmv
|
|
||||||
video/mpegps
|
|
||||||
video/flv
|
|
||||||
"""
|
|
||||||
url = url.lower()
|
|
||||||
|
|
||||||
# Map file extensions to mime types
|
|
||||||
mime_types = {
|
|
||||||
# Images
|
|
||||||
(".jpg", ".jpeg"): "image/jpeg",
|
|
||||||
(".png",): "image/png",
|
|
||||||
(".webp",): "image/webp",
|
|
||||||
# Videos
|
|
||||||
(".mp4",): "video/mp4",
|
|
||||||
(".mov",): "video/mov",
|
|
||||||
(".mpeg", ".mpg"): "video/mpeg",
|
|
||||||
(".avi",): "video/avi",
|
|
||||||
(".wmv",): "video/wmv",
|
|
||||||
(".mpegps",): "video/mpegps",
|
|
||||||
(".flv",): "video/flv",
|
|
||||||
# Audio
|
|
||||||
(".mp3",): "audio/mp3",
|
|
||||||
(".wav",): "audio/wav",
|
|
||||||
(".mpeg",): "audio/mpeg",
|
|
||||||
# Documents
|
|
||||||
(".pdf",): "application/pdf",
|
|
||||||
(".txt",): "text/plain",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check each extension group against the URL
|
|
||||||
for extensions, mime_type in mime_types.items():
|
|
||||||
if any(url.endswith(ext) for ext in extensions):
|
|
||||||
return mime_type
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _gemini_convert_messages_with_history( # noqa: PLR0915
|
def _gemini_convert_messages_with_history( # noqa: PLR0915
|
||||||
messages: List[AllMessageValues],
|
messages: List[AllMessageValues],
|
||||||
) -> List[ContentType]:
|
) -> List[ContentType]:
|
||||||
|
|
|
@ -179,6 +179,7 @@ class ToolUseBlockStartEvent(TypedDict):
|
||||||
|
|
||||||
class ContentBlockStartEvent(TypedDict, total=False):
|
class ContentBlockStartEvent(TypedDict, total=False):
|
||||||
toolUse: Optional[ToolUseBlockStartEvent]
|
toolUse: Optional[ToolUseBlockStartEvent]
|
||||||
|
reasoningContent: BedrockConverseReasoningContentBlockDelta
|
||||||
|
|
||||||
|
|
||||||
class ContentBlockDeltaEvent(TypedDict, total=False):
|
class ContentBlockDeltaEvent(TypedDict, total=False):
|
||||||
|
|
|
@ -468,6 +468,12 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
|
||||||
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
|
||||||
|
type: Required[Literal["redacted_thinking"]]
|
||||||
|
data: str
|
||||||
|
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
|
||||||
|
|
||||||
|
|
||||||
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
|
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
|
||||||
city: str
|
city: str
|
||||||
"""Free text input for the city of the user, e.g. `San Francisco`."""
|
"""Free text input for the city of the user, e.g. `San Francisco`."""
|
||||||
|
@ -797,7 +803,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
|
||||||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||||
provider_specific_fields: Optional[dict]
|
provider_specific_fields: Optional[dict]
|
||||||
reasoning_content: Optional[str]
|
reasoning_content: Optional[str]
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
|
thinking_blocks: Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionUsageBlock(TypedDict):
|
class ChatCompletionUsageBlock(TypedDict):
|
||||||
|
|
|
@ -29,6 +29,7 @@ from .guardrails import GuardrailEventHooks
|
||||||
from .llms.openai import (
|
from .llms.openai import (
|
||||||
Batch,
|
Batch,
|
||||||
ChatCompletionAnnotation,
|
ChatCompletionAnnotation,
|
||||||
|
ChatCompletionRedactedThinkingBlock,
|
||||||
ChatCompletionThinkingBlock,
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
|
@ -552,7 +553,9 @@ class Message(OpenAIObject):
|
||||||
function_call: Optional[FunctionCall]
|
function_call: Optional[FunctionCall]
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None
|
audio: Optional[ChatCompletionAudioResponse] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
] = None
|
||||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
||||||
default=None, exclude=True
|
default=None, exclude=True
|
||||||
)
|
)
|
||||||
|
@ -567,7 +570,11 @@ class Message(OpenAIObject):
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||||
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
||||||
reasoning_content: Optional[str] = None,
|
reasoning_content: Optional[str] = None,
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None,
|
||||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
@ -650,7 +657,9 @@ class Message(OpenAIObject):
|
||||||
|
|
||||||
class Delta(OpenAIObject):
|
class Delta(OpenAIObject):
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
thinking_blocks: Optional[
|
||||||
|
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
|
||||||
|
] = None
|
||||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -661,7 +670,11 @@ class Delta(OpenAIObject):
|
||||||
tool_calls=None,
|
tool_calls=None,
|
||||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||||
reasoning_content: Optional[str] = None,
|
reasoning_content: Optional[str] = None,
|
||||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
thinking_blocks: Optional[
|
||||||
|
List[
|
||||||
|
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
|
||||||
|
]
|
||||||
|
] = None,
|
||||||
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
annotations: Optional[List[ChatCompletionAnnotation]] = None,
|
||||||
**params,
|
**params,
|
||||||
):
|
):
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../../../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
from litellm.llms.anthropic.chat.handler import ModelResponseIterator
|
||||||
|
|
||||||
|
|
||||||
|
def test_redacted_thinking_content_block_delta():
|
||||||
|
chunk = {
|
||||||
|
"type": "content_block_start",
|
||||||
|
"index": 58,
|
||||||
|
"content_block": {
|
||||||
|
"type": "redacted_thinking",
|
||||||
|
"data": "EuoBCoYBGAIiQJ/SxkPAgqxhKok29YrpJHRUJ0OT8ahCHKAwyhmRuUhtdmDX9+mn4gDzKNv3fVpQdB01zEPMzNY3QuTCd+1bdtEqQK6JuKHqdndbwpr81oVWb4wxd1GqF/7Jkw74IlQa27oobX+KuRkopr9Dllt/RDe7Se0sI1IkU7tJIAQCoP46OAwSDF51P09q67xhHlQ3ihoM2aOVlkghq/X0w8NlIjBMNvXYNbjhyrOcIg6kPFn2ed/KK7Cm5prYAtXCwkb4Wr5tUSoSHu9T5hKdJRbr6WsqEc7Lle7FULqMLZGkhqXyc3BA",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
model_response_iterator = ModelResponseIterator(
|
||||||
|
streaming_response=MagicMock(), sync_stream=False, json_mode=False
|
||||||
|
)
|
||||||
|
model_response = model_response_iterator.chunk_parser(chunk=chunk)
|
||||||
|
print(f"\n\nmodel_response: {model_response}\n\n")
|
||||||
|
assert model_response.choices[0].delta.thinking_blocks is not None
|
||||||
|
assert len(model_response.choices[0].delta.thinking_blocks) == 1
|
||||||
|
print(
|
||||||
|
f"\n\nmodel_response.choices[0].delta.thinking_blocks[0]: {model_response.choices[0].delta.thinking_blocks[0]}\n\n"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
model_response.choices[0].delta.thinking_blocks[0]["type"]
|
||||||
|
== "redacted_thinking"
|
||||||
|
)
|
|
@ -56,3 +56,58 @@ def test_calculate_usage():
|
||||||
assert usage.prompt_tokens_details.cached_tokens == 0
|
assert usage.prompt_tokens_details.cached_tokens == 0
|
||||||
assert usage._cache_creation_input_tokens == 12304
|
assert usage._cache_creation_input_tokens == 12304
|
||||||
assert usage._cache_read_input_tokens == 0
|
assert usage._cache_read_input_tokens == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_response_content_with_citations():
|
||||||
|
config = AnthropicConfig()
|
||||||
|
|
||||||
|
completion_response = {
|
||||||
|
"id": "msg_01XrAv7gc5tQNDuoADra7vB4",
|
||||||
|
"type": "message",
|
||||||
|
"role": "assistant",
|
||||||
|
"model": "claude-3-5-sonnet-20241022",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "According to the documents, "},
|
||||||
|
{
|
||||||
|
"citations": [
|
||||||
|
{
|
||||||
|
"type": "char_location",
|
||||||
|
"cited_text": "The grass is green. ",
|
||||||
|
"document_index": 0,
|
||||||
|
"document_title": "My Document",
|
||||||
|
"start_char_index": 0,
|
||||||
|
"end_char_index": 20,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"type": "text",
|
||||||
|
"text": "the grass is green",
|
||||||
|
},
|
||||||
|
{"type": "text", "text": " and "},
|
||||||
|
{
|
||||||
|
"citations": [
|
||||||
|
{
|
||||||
|
"type": "char_location",
|
||||||
|
"cited_text": "The sky is blue.",
|
||||||
|
"document_index": 0,
|
||||||
|
"document_title": "My Document",
|
||||||
|
"start_char_index": 20,
|
||||||
|
"end_char_index": 36,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"type": "text",
|
||||||
|
"text": "the sky is blue",
|
||||||
|
},
|
||||||
|
{"type": "text", "text": "."},
|
||||||
|
],
|
||||||
|
"stop_reason": "end_turn",
|
||||||
|
"stop_sequence": None,
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 610,
|
||||||
|
"cache_creation_input_tokens": 0,
|
||||||
|
"cache_read_input_tokens": 0,
|
||||||
|
"output_tokens": 51,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
_, citations, _, _, _ = config.extract_response_content(completion_response)
|
||||||
|
assert citations is not None
|
||||||
|
|
|
@ -40,3 +40,22 @@ def test_transform_usage():
|
||||||
)
|
)
|
||||||
assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"]
|
assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"]
|
||||||
assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"]
|
assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_transform_thinking_blocks_with_redacted_content():
|
||||||
|
thinking_blocks = [
|
||||||
|
{
|
||||||
|
"reasoningText": {
|
||||||
|
"text": "This is a test",
|
||||||
|
"signature": "test_signature",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"redactedContent": "This is a redacted content",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
config = AmazonConverseConfig()
|
||||||
|
transformed_thinking_blocks = config._transform_thinking_blocks(thinking_blocks)
|
||||||
|
assert len(transformed_thinking_blocks) == 2
|
||||||
|
assert transformed_thinking_blocks[0]["type"] == "thinking"
|
||||||
|
assert transformed_thinking_blocks[1]["type"] == "redacted_thinking"
|
||||||
|
|
22
tests/litellm/llms/bedrock/chat/test_invoke_handler.py
Normal file
22
tests/litellm/llms/bedrock/chat/test_invoke_handler.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../../../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
|
||||||
|
|
||||||
|
|
||||||
|
def test_transform_thinking_blocks_with_redacted_content():
|
||||||
|
thinking_block = {"redactedContent": "This is a redacted content"}
|
||||||
|
decoder = AWSEventStreamDecoder(model="test")
|
||||||
|
transformed_thinking_blocks = decoder.translate_thinking_blocks(thinking_block)
|
||||||
|
assert len(transformed_thinking_blocks) == 1
|
||||||
|
assert transformed_thinking_blocks[0]["type"] == "redacted_thinking"
|
||||||
|
assert transformed_thinking_blocks[0]["data"] == "This is a redacted content"
|
|
@ -0,0 +1,45 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../../../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
from litellm.llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
|
||||||
|
|
||||||
|
|
||||||
|
def test_hosted_vllm_chat_transformation_file_url():
|
||||||
|
config = HostedVLLMChatConfig()
|
||||||
|
video_url = "https://example.com/video.mp4"
|
||||||
|
video_data = f"data:video/mp4;base64,{video_url}"
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_data": video_data,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
transformed_response = config.transform_request(
|
||||||
|
model="hosted_vllm/llama-3.1-70b-instruct",
|
||||||
|
messages=messages,
|
||||||
|
optional_params={},
|
||||||
|
litellm_params={},
|
||||||
|
headers={},
|
||||||
|
)
|
||||||
|
assert transformed_response["messages"] == [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [{"type": "video_url", "video_url": {"url": video_data}}],
|
||||||
|
}
|
||||||
|
]
|
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue