Support 'file' message type for VLLM video url's + Anthropic redacted message thinking support (#10129)

* feat(hosted_vllm/chat/transformation.py): support calling vllm video url with openai 'file' message type

allows switching between gemini/vllm easily

* [WIP] redacted thinking tests (#9044)

* WIP: redacted thinking tests

* test: add test for redacted thinking in assistant message

---------

Co-authored-by: Krish Dholakia <krrishdholakia@gmail.com>

* fix(anthropic/chat/transformation.py): support redacted thinking block on anthropic completion

Fixes https://github.com/BerriAI/litellm/issues/9058

* fix(anthropic/chat/handler.py): transform anthropic redacted messages on streaming

Fixes https://github.com/BerriAI/litellm/issues/9058

* fix(bedrock/): support redacted text on streaming + non-streaming

Fixes https://github.com/BerriAI/litellm/issues/9058

* feat(litellm_proxy/chat/transformation.py): support 'reasoning_effort' param for proxy

allows using reasoning effort with thinking models on proxy

* test: update tests

* fix(utils.py): fix linting error

* fix: fix linting errors

* fix: fix linting errors

* fix: fix linting error

* fix: fix linting errors

* fix(anthropic/chat/transformation.py): fix returning citations in chat completion

---------

Co-authored-by: Johann Miller <22018973+johannkm@users.noreply.github.com>
This commit is contained in:
Krish Dholakia 2025-04-19 11:16:37 -07:00 committed by GitHub
parent 3c463f6715
commit f08a4e3c06
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 638 additions and 109 deletions

View file

@ -14,6 +14,7 @@ from litellm.types.llms.openai import ChatCompletionThinkingBlock
from litellm.types.utils import (
ChatCompletionDeltaToolCall,
ChatCompletionMessageToolCall,
ChatCompletionRedactedThinkingBlock,
Choices,
Delta,
EmbeddingResponse,
@ -486,7 +487,14 @@ def convert_to_model_response_object( # noqa: PLR0915
)
# Handle thinking models that display `thinking_blocks` within `content`
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock,
ChatCompletionRedactedThinkingBlock,
]
]
] = None
if "thinking_blocks" in choice["message"]:
thinking_blocks = choice["message"]["thinking_blocks"]
provider_specific_fields["thinking_blocks"] = thinking_blocks

View file

@ -471,3 +471,59 @@ def unpack_defs(schema, defs):
unpack_defs(ref, defs)
value["items"] = ref
continue
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
"""
Get mime type for common image URLs
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
Supported by Gemini:
application/pdf
audio/mpeg
audio/mp3
audio/wav
image/png
image/jpeg
image/webp
text/plain
video/mov
video/mpeg
video/mp4
video/mpg
video/avi
video/wmv
video/mpegps
video/flv
"""
url = url.lower()
# Map file extensions to mime types
mime_types = {
# Images
(".jpg", ".jpeg"): "image/jpeg",
(".png",): "image/png",
(".webp",): "image/webp",
# Videos
(".mp4",): "video/mp4",
(".mov",): "video/mov",
(".mpeg", ".mpg"): "video/mpeg",
(".avi",): "video/avi",
(".wmv",): "video/wmv",
(".mpegps",): "video/mpegps",
(".flv",): "video/flv",
# Audio
(".mp3",): "audio/mp3",
(".wav",): "audio/wav",
(".mpeg",): "audio/mpeg",
# Documents
(".pdf",): "application/pdf",
(".txt",): "text/plain",
}
# Check each extension group against the URL
for extensions, mime_type in mime_types.items():
if any(url.endswith(ext) for ext in extensions):
return mime_type
return None

View file

@ -2258,6 +2258,14 @@ def _parse_content_type(content_type: str) -> str:
return m.get_content_type()
def _parse_mime_type(base64_data: str) -> Optional[str]:
mime_type_match = re.match(r"data:(.*?);base64", base64_data)
if mime_type_match:
return mime_type_match.group(1)
else:
return None
class BedrockImageProcessor:
"""Handles both sync and async image processing for Bedrock conversations."""

View file

@ -29,6 +29,7 @@ from litellm.types.llms.anthropic import (
UsageDelta,
)
from litellm.types.llms.openai import (
ChatCompletionRedactedThinkingBlock,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
)
@ -501,18 +502,19 @@ class ModelResponseIterator:
) -> Tuple[
str,
Optional[ChatCompletionToolCallChunk],
List[ChatCompletionThinkingBlock],
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
Dict[str, Any],
]:
"""
Helper function to handle the content block delta
"""
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
provider_specific_fields = {}
content_block = ContentBlockDelta(**chunk) # type: ignore
thinking_blocks: List[ChatCompletionThinkingBlock] = []
thinking_blocks: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = []
self.content_blocks.append(content_block)
if "text" in content_block["delta"]:
@ -541,20 +543,25 @@ class ModelResponseIterator:
)
]
provider_specific_fields["thinking_blocks"] = thinking_blocks
return text, tool_use, thinking_blocks, provider_specific_fields
def _handle_reasoning_content(
self, thinking_blocks: List[ChatCompletionThinkingBlock]
self,
thinking_blocks: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
],
) -> Optional[str]:
"""
Handle the reasoning content
"""
reasoning_content = None
for block in thinking_blocks:
thinking_content = cast(Optional[str], block.get("thinking"))
if reasoning_content is None:
reasoning_content = ""
if "thinking" in block:
reasoning_content += block["thinking"]
if thinking_content is not None:
reasoning_content += thinking_content
return reasoning_content
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
@ -567,7 +574,13 @@ class ModelResponseIterator:
usage: Optional[Usage] = None
provider_specific_fields: Dict[str, Any] = {}
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
index = int(chunk.get("index", 0))
if type_chunk == "content_block_delta":
@ -605,6 +618,15 @@ class ModelResponseIterator:
},
"index": self.tool_index,
}
elif (
content_block_start["content_block"]["type"] == "redacted_thinking"
):
thinking_blocks = [
ChatCompletionRedactedThinkingBlock(
type="redacted_thinking",
data=content_block_start["content_block"]["data"],
)
]
elif type_chunk == "content_block_stop":
ContentBlockStop(**chunk) # type: ignore
# check if tool call content block

View file

@ -30,6 +30,7 @@ from litellm.types.llms.openai import (
REASONING_EFFORT,
AllMessageValues,
ChatCompletionCachedContent,
ChatCompletionRedactedThinkingBlock,
ChatCompletionSystemMessage,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
@ -575,13 +576,21 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
) -> Tuple[
str,
Optional[List[Any]],
Optional[List[ChatCompletionThinkingBlock]],
Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
],
Optional[str],
List[ChatCompletionToolCallChunk],
]:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
for idx, content in enumerate(completion_response["content"]):
@ -600,20 +609,30 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
index=idx,
)
)
## CITATIONS
if content.get("citations", None) is not None:
if citations is None:
citations = []
citations.append(content["citations"])
if content.get("thinking", None) is not None:
elif content.get("thinking", None) is not None:
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
elif content["type"] == "redacted_thinking":
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(
cast(ChatCompletionRedactedThinkingBlock, content)
)
## CITATIONS
if content.get("citations") is not None:
if citations is None:
citations = []
citations.append(content["citations"])
if thinking_blocks is not None:
reasoning_content = ""
for block in thinking_blocks:
if "thinking" in block:
reasoning_content += block["thinking"]
thinking_content = cast(Optional[str], block.get("thinking"))
if thinking_content is not None:
reasoning_content += thinking_content
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
def calculate_usage(
@ -703,7 +722,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
else:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []

View file

@ -22,6 +22,7 @@ from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMExcepti
from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionRedactedThinkingBlock,
ChatCompletionResponseMessage,
ChatCompletionSystemMessage,
ChatCompletionThinkingBlock,
@ -627,9 +628,11 @@ class AmazonConverseConfig(BaseConfig):
def _transform_thinking_blocks(
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
) -> List[ChatCompletionThinkingBlock]:
) -> List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]:
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
thinking_blocks_list: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = []
for block in thinking_blocks:
if "reasoningText" in block:
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
@ -640,6 +643,11 @@ class AmazonConverseConfig(BaseConfig):
if _signature is not None:
_thinking_block["signature"] = _signature
thinking_blocks_list.append(_thinking_block)
elif "redactedContent" in block:
_redacted_block = ChatCompletionRedactedThinkingBlock(
type="redacted_thinking", data=block["redactedContent"]
)
thinking_blocks_list.append(_redacted_block)
return thinking_blocks_list
def _transform_usage(self, usage: ConverseTokenUsageBlock) -> Usage:

View file

@ -50,6 +50,7 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import (
ChatCompletionRedactedThinkingBlock,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
@ -1255,19 +1256,33 @@ class AWSEventStreamDecoder:
def translate_thinking_blocks(
self, thinking_block: BedrockConverseReasoningContentBlockDelta
) -> Optional[List[ChatCompletionThinkingBlock]]:
) -> Optional[
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
]:
"""
Translate the thinking blocks to a string
"""
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
thinking_blocks_list: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = []
_thinking_block: Optional[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = None
if "text" in thinking_block:
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
_thinking_block["thinking"] = thinking_block["text"]
elif "signature" in thinking_block:
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
_thinking_block["signature"] = thinking_block["signature"]
_thinking_block["thinking"] = "" # consistent with anthropic response
thinking_blocks_list.append(_thinking_block)
elif "redactedContent" in thinking_block:
_thinking_block = ChatCompletionRedactedThinkingBlock(
type="redacted_thinking", data=thinking_block["redactedContent"]
)
if _thinking_block is not None:
thinking_blocks_list.append(_thinking_block)
return thinking_blocks_list
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
@ -1279,31 +1294,44 @@ class AWSEventStreamDecoder:
usage: Optional[Usage] = None
provider_specific_fields: dict = {}
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
index = int(chunk_data.get("contentBlockIndex", 0))
if "start" in chunk_data:
start_obj = ContentBlockStartEvent(**chunk_data["start"])
self.content_blocks = [] # reset
if (
start_obj is not None
and "toolUse" in start_obj
and start_obj["toolUse"] is not None
):
## check tool name was formatted by litellm
_response_tool_name = start_obj["toolUse"]["name"]
response_tool_name = get_bedrock_tool_name(
response_tool_name=_response_tool_name
)
tool_use = {
"id": start_obj["toolUse"]["toolUseId"],
"type": "function",
"function": {
"name": response_tool_name,
"arguments": "",
},
"index": index,
}
if start_obj is not None:
if "toolUse" in start_obj and start_obj["toolUse"] is not None:
## check tool name was formatted by litellm
_response_tool_name = start_obj["toolUse"]["name"]
response_tool_name = get_bedrock_tool_name(
response_tool_name=_response_tool_name
)
tool_use = {
"id": start_obj["toolUse"]["toolUseId"],
"type": "function",
"function": {
"name": response_tool_name,
"arguments": "",
},
"index": index,
}
elif (
"reasoningContent" in start_obj
and start_obj["reasoningContent"] is not None
): # redacted thinking can be in start object
thinking_blocks = self.translate_thinking_blocks(
start_obj["reasoningContent"]
)
provider_specific_fields = {
"reasoningContent": start_obj["reasoningContent"],
}
elif "delta" in chunk_data:
delta_obj = ContentBlockDeltaEvent(**chunk_data["delta"])
self.content_blocks.append(delta_obj)

View file

@ -37,6 +37,7 @@ from litellm.types.llms.databricks import (
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionRedactedThinkingBlock,
ChatCompletionThinkingBlock,
ChatCompletionToolChoiceFunctionParam,
ChatCompletionToolChoiceObjectParam,
@ -314,13 +315,24 @@ class DatabricksConfig(DatabricksBase, OpenAILikeChatConfig, AnthropicConfig):
@staticmethod
def extract_reasoning_content(
content: Optional[AllDatabricksContentValues],
) -> Tuple[Optional[str], Optional[List[ChatCompletionThinkingBlock]]]:
) -> Tuple[
Optional[str],
Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
],
]:
"""
Extract and return the reasoning content and thinking blocks
"""
if content is None:
return None, None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
] = None
reasoning_content: Optional[str] = None
if isinstance(content, list):
for item in content:

View file

@ -2,9 +2,19 @@
Translate from OpenAI's `/v1/chat/completions` to VLLM's `/v1/chat/completions`
"""
from typing import Optional, Tuple
from typing import List, Optional, Tuple, cast
from litellm.litellm_core_utils.prompt_templates.common_utils import (
_get_image_mime_type_from_url,
)
from litellm.litellm_core_utils.prompt_templates.factory import _parse_mime_type
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionFileObject,
ChatCompletionVideoObject,
ChatCompletionVideoUrlObject,
)
from ....utils import _remove_additional_properties, _remove_strict_from_schema
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
@ -38,3 +48,71 @@ class HostedVLLMChatConfig(OpenAIGPTConfig):
api_key or get_secret_str("HOSTED_VLLM_API_KEY") or "fake-api-key"
) # vllm does not require an api key
return api_base, dynamic_api_key
def _is_video_file(self, content_item: ChatCompletionFileObject) -> bool:
"""
Check if the file is a video
- format: video/<extension>
- file_data: base64 encoded video data
- file_id: infer mp4 from extension
"""
file = content_item.get("file", {})
format = file.get("format")
file_data = file.get("file_data")
file_id = file.get("file_id")
if content_item.get("type") != "file":
return False
if format and format.startswith("video/"):
return True
elif file_data:
mime_type = _parse_mime_type(file_data)
if mime_type and mime_type.startswith("video/"):
return True
elif file_id:
mime_type = _get_image_mime_type_from_url(file_id)
if mime_type and mime_type.startswith("video/"):
return True
return False
def _convert_file_to_video_url(
self, content_item: ChatCompletionFileObject
) -> ChatCompletionVideoObject:
file = content_item.get("file", {})
file_id = file.get("file_id")
file_data = file.get("file_data")
if file_id:
return ChatCompletionVideoObject(
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_id)
)
elif file_data:
return ChatCompletionVideoObject(
type="video_url", video_url=ChatCompletionVideoUrlObject(url=file_data)
)
raise ValueError("file_id or file_data is required")
def _transform_messages(
self, messages: List[AllMessageValues], model: str
) -> List[AllMessageValues]:
"""
Support translating video files from file_id or file_data to video_url
"""
for message in messages:
if message["role"] == "user":
message_content = message.get("content")
if message_content and isinstance(message_content, list):
replaced_content_items: List[
Tuple[int, ChatCompletionFileObject]
] = []
for idx, content_item in enumerate(message_content):
if content_item.get("type") == "file":
content_item = cast(ChatCompletionFileObject, content_item)
if self._is_video_file(content_item):
replaced_content_items.append((idx, content_item))
for idx, content_item in replaced_content_items:
message_content[idx] = self._convert_file_to_video_url(
content_item
)
transformed_messages = super()._transform_messages(messages, model)
return transformed_messages

View file

@ -13,6 +13,7 @@ class LiteLLMProxyChatConfig(OpenAIGPTConfig):
def get_supported_openai_params(self, model: str) -> List:
list = super().get_supported_openai_params(model)
list.append("thinking")
list.append("reasoning_effort")
return list
def _map_openai_params(

View file

@ -12,6 +12,9 @@ from pydantic import BaseModel
import litellm
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
_get_image_mime_type_from_url,
)
from litellm.litellm_core_utils.prompt_templates.factory import (
convert_to_anthropic_image_obj,
convert_to_gemini_tool_call_invoke,
@ -99,62 +102,6 @@ def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartT
raise e
def _get_image_mime_type_from_url(url: str) -> Optional[str]:
"""
Get mime type for common image URLs
See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
Supported by Gemini:
application/pdf
audio/mpeg
audio/mp3
audio/wav
image/png
image/jpeg
image/webp
text/plain
video/mov
video/mpeg
video/mp4
video/mpg
video/avi
video/wmv
video/mpegps
video/flv
"""
url = url.lower()
# Map file extensions to mime types
mime_types = {
# Images
(".jpg", ".jpeg"): "image/jpeg",
(".png",): "image/png",
(".webp",): "image/webp",
# Videos
(".mp4",): "video/mp4",
(".mov",): "video/mov",
(".mpeg", ".mpg"): "video/mpeg",
(".avi",): "video/avi",
(".wmv",): "video/wmv",
(".mpegps",): "video/mpegps",
(".flv",): "video/flv",
# Audio
(".mp3",): "audio/mp3",
(".wav",): "audio/wav",
(".mpeg",): "audio/mpeg",
# Documents
(".pdf",): "application/pdf",
(".txt",): "text/plain",
}
# Check each extension group against the URL
for extensions, mime_type in mime_types.items():
if any(url.endswith(ext) for ext in extensions):
return mime_type
return None
def _gemini_convert_messages_with_history( # noqa: PLR0915
messages: List[AllMessageValues],
) -> List[ContentType]:

View file

@ -179,6 +179,7 @@ class ToolUseBlockStartEvent(TypedDict):
class ContentBlockStartEvent(TypedDict, total=False):
toolUse: Optional[ToolUseBlockStartEvent]
reasoningContent: BedrockConverseReasoningContentBlockDelta
class ContentBlockDeltaEvent(TypedDict, total=False):

View file

@ -468,6 +468,12 @@ class ChatCompletionThinkingBlock(TypedDict, total=False):
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class ChatCompletionRedactedThinkingBlock(TypedDict, total=False):
type: Required[Literal["redacted_thinking"]]
data: str
cache_control: Optional[Union[dict, ChatCompletionCachedContent]]
class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
city: str
"""Free text input for the city of the user, e.g. `San Francisco`."""
@ -797,7 +803,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
function_call: Optional[ChatCompletionToolCallFunctionChunk]
provider_specific_fields: Optional[dict]
reasoning_content: Optional[str]
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
thinking_blocks: Optional[
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
]
class ChatCompletionUsageBlock(TypedDict):

View file

@ -29,6 +29,7 @@ from .guardrails import GuardrailEventHooks
from .llms.openai import (
Batch,
ChatCompletionAnnotation,
ChatCompletionRedactedThinkingBlock,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionUsageBlock,
@ -552,7 +553,9 @@ class Message(OpenAIObject):
function_call: Optional[FunctionCall]
audio: Optional[ChatCompletionAudioResponse] = None
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
] = None
provider_specific_fields: Optional[Dict[str, Any]] = Field(
default=None, exclude=True
)
@ -567,7 +570,11 @@ class Message(OpenAIObject):
audio: Optional[ChatCompletionAudioResponse] = None,
provider_specific_fields: Optional[Dict[str, Any]] = None,
reasoning_content: Optional[str] = None,
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
thinking_blocks: Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
] = None,
annotations: Optional[List[ChatCompletionAnnotation]] = None,
**params,
):
@ -650,7 +657,9 @@ class Message(OpenAIObject):
class Delta(OpenAIObject):
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
thinking_blocks: Optional[
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]]
] = None
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
def __init__(
@ -661,7 +670,11 @@ class Delta(OpenAIObject):
tool_calls=None,
audio: Optional[ChatCompletionAudioResponse] = None,
reasoning_content: Optional[str] = None,
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
thinking_blocks: Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
] = None,
annotations: Optional[List[ChatCompletionAnnotation]] = None,
**params,
):

View file

@ -0,0 +1,38 @@
import json
import os
import sys
from unittest.mock import MagicMock
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.anthropic.chat.handler import ModelResponseIterator
def test_redacted_thinking_content_block_delta():
chunk = {
"type": "content_block_start",
"index": 58,
"content_block": {
"type": "redacted_thinking",
"data": "EuoBCoYBGAIiQJ/SxkPAgqxhKok29YrpJHRUJ0OT8ahCHKAwyhmRuUhtdmDX9+mn4gDzKNv3fVpQdB01zEPMzNY3QuTCd+1bdtEqQK6JuKHqdndbwpr81oVWb4wxd1GqF/7Jkw74IlQa27oobX+KuRkopr9Dllt/RDe7Se0sI1IkU7tJIAQCoP46OAwSDF51P09q67xhHlQ3ihoM2aOVlkghq/X0w8NlIjBMNvXYNbjhyrOcIg6kPFn2ed/KK7Cm5prYAtXCwkb4Wr5tUSoSHu9T5hKdJRbr6WsqEc7Lle7FULqMLZGkhqXyc3BA",
},
}
model_response_iterator = ModelResponseIterator(
streaming_response=MagicMock(), sync_stream=False, json_mode=False
)
model_response = model_response_iterator.chunk_parser(chunk=chunk)
print(f"\n\nmodel_response: {model_response}\n\n")
assert model_response.choices[0].delta.thinking_blocks is not None
assert len(model_response.choices[0].delta.thinking_blocks) == 1
print(
f"\n\nmodel_response.choices[0].delta.thinking_blocks[0]: {model_response.choices[0].delta.thinking_blocks[0]}\n\n"
)
assert (
model_response.choices[0].delta.thinking_blocks[0]["type"]
== "redacted_thinking"
)

View file

@ -56,3 +56,58 @@ def test_calculate_usage():
assert usage.prompt_tokens_details.cached_tokens == 0
assert usage._cache_creation_input_tokens == 12304
assert usage._cache_read_input_tokens == 0
def test_extract_response_content_with_citations():
config = AnthropicConfig()
completion_response = {
"id": "msg_01XrAv7gc5tQNDuoADra7vB4",
"type": "message",
"role": "assistant",
"model": "claude-3-5-sonnet-20241022",
"content": [
{"type": "text", "text": "According to the documents, "},
{
"citations": [
{
"type": "char_location",
"cited_text": "The grass is green. ",
"document_index": 0,
"document_title": "My Document",
"start_char_index": 0,
"end_char_index": 20,
}
],
"type": "text",
"text": "the grass is green",
},
{"type": "text", "text": " and "},
{
"citations": [
{
"type": "char_location",
"cited_text": "The sky is blue.",
"document_index": 0,
"document_title": "My Document",
"start_char_index": 20,
"end_char_index": 36,
}
],
"type": "text",
"text": "the sky is blue",
},
{"type": "text", "text": "."},
],
"stop_reason": "end_turn",
"stop_sequence": None,
"usage": {
"input_tokens": 610,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
"output_tokens": 51,
},
}
_, citations, _, _, _ = config.extract_response_content(completion_response)
assert citations is not None

View file

@ -40,3 +40,22 @@ def test_transform_usage():
)
assert openai_usage._cache_creation_input_tokens == usage["cacheWriteInputTokens"]
assert openai_usage._cache_read_input_tokens == usage["cacheReadInputTokens"]
def test_transform_thinking_blocks_with_redacted_content():
thinking_blocks = [
{
"reasoningText": {
"text": "This is a test",
"signature": "test_signature",
}
},
{
"redactedContent": "This is a redacted content",
},
]
config = AmazonConverseConfig()
transformed_thinking_blocks = config._transform_thinking_blocks(thinking_blocks)
assert len(transformed_thinking_blocks) == 2
assert transformed_thinking_blocks[0]["type"] == "thinking"
assert transformed_thinking_blocks[1]["type"] == "redacted_thinking"

View file

@ -0,0 +1,22 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from unittest.mock import MagicMock, patch
from litellm.llms.bedrock.chat.invoke_handler import AWSEventStreamDecoder
def test_transform_thinking_blocks_with_redacted_content():
thinking_block = {"redactedContent": "This is a redacted content"}
decoder = AWSEventStreamDecoder(model="test")
transformed_thinking_blocks = decoder.translate_thinking_blocks(thinking_block)
assert len(transformed_thinking_blocks) == 1
assert transformed_thinking_blocks[0]["type"] == "redacted_thinking"
assert transformed_thinking_blocks[0]["data"] == "This is a redacted content"

View file

@ -0,0 +1,45 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.hosted_vllm.chat.transformation import HostedVLLMChatConfig
def test_hosted_vllm_chat_transformation_file_url():
config = HostedVLLMChatConfig()
video_url = "https://example.com/video.mp4"
video_data = f"data:video/mp4;base64,{video_url}"
messages = [
{
"role": "user",
"content": [
{
"type": "file",
"file": {
"file_data": video_data,
},
}
],
}
]
transformed_response = config.transform_request(
model="hosted_vllm/llama-3.1-70b-instruct",
messages=messages,
optional_params={},
litellm_params={},
headers={},
)
assert transformed_response["messages"] == [
{
"role": "user",
"content": [{"type": "video_url", "video_url": {"url": video_data}}],
}
]

File diff suppressed because one or more lines are too long