mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Litellm dev bedrock anthropic 3 7 v2 (#8843)
* feat(bedrock/converse/transformation.py): support claude-3-7-sonnet reasoning_Content transformation Closes https://github.com/BerriAI/litellm/issues/8777 * fix(bedrock/): support returning `reasoning_content` on streaming for claude-3-7 Resolves https://github.com/BerriAI/litellm/issues/8777 * feat(bedrock/): unify converse reasoning content blocks for consistency across anthropic and bedrock * fix(anthropic/chat/transformation.py): handle deepseek-style 'reasoning_content' extraction within transformation.py simpler logic * feat(bedrock/): fix streaming to return blocks in consistent format * fix: fix linting error * test: fix test * feat(factory.py): fix bedrock thinking block translation on tool calling allows passing the thinking blocks back to bedrock for tool calling * fix(types/utils.py): don't exclude provider_specific_fields on model dump ensures consistent responses * fix: fix linting errors * fix(convert_dict_to_response.py): pass reasoning_content on root * fix: test * fix(streaming_handler.py): add helper util for setting model id * fix(streaming_handler.py): fix setting model id on model response stream chunk * fix(streaming_handler.py): fix linting error * fix(streaming_handler.py): fix linting error * fix(types/utils.py): add provider_specific_fields to model stream response * fix(streaming_handler.py): copy provider specific fields and add them to the root of the streaming response * fix(streaming_handler.py): fix check * fix: fix test * fix(types/utils.py): ensure messages content is always openai compatible * fix(types/utils.py): fix delta object to always be openai compatible only introduce new params if variable exists * test: fix bedrock nova tests * test: skip flaky test * test: skip flaky test in ci/cd
This commit is contained in:
parent
40a3af7d61
commit
ab7c4d1a0e
20 changed files with 447 additions and 149 deletions
|
@ -473,6 +473,7 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
tool_calls=tool_calls,
|
||||
audio=choice["message"].get("audio", None),
|
||||
provider_specific_fields=provider_specific_fields,
|
||||
reasoning_content=reasoning_content,
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
|
|
|
@ -2151,6 +2151,10 @@ from email.message import Message
|
|||
|
||||
import httpx
|
||||
|
||||
from litellm.types.llms.bedrock import (
|
||||
BedrockConverseReasoningContentBlock,
|
||||
BedrockConverseReasoningTextBlock,
|
||||
)
|
||||
from litellm.types.llms.bedrock import ContentBlock as BedrockContentBlock
|
||||
from litellm.types.llms.bedrock import DocumentBlock as BedrockDocumentBlock
|
||||
from litellm.types.llms.bedrock import ImageBlock as BedrockImageBlock
|
||||
|
@ -2963,6 +2967,28 @@ class BedrockConverseMessagesProcessor:
|
|||
|
||||
return contents
|
||||
|
||||
@staticmethod
|
||||
def translate_thinking_blocks_to_reasoning_content_blocks(
|
||||
thinking_blocks: List[ChatCompletionThinkingBlock],
|
||||
) -> List[BedrockContentBlock]:
|
||||
reasoning_content_blocks: List[BedrockContentBlock] = []
|
||||
for thinking_block in thinking_blocks:
|
||||
reasoning_text = thinking_block.get("thinking")
|
||||
reasoning_signature = thinking_block.get("signature_delta")
|
||||
text_block = BedrockConverseReasoningTextBlock(
|
||||
text=reasoning_text or "",
|
||||
)
|
||||
if reasoning_signature is not None:
|
||||
text_block["signature"] = reasoning_signature
|
||||
reasoning_content_block = BedrockConverseReasoningContentBlock(
|
||||
reasoningText=text_block,
|
||||
)
|
||||
bedrock_content_block = BedrockContentBlock(
|
||||
reasoningContent=reasoning_content_block
|
||||
)
|
||||
reasoning_content_blocks.append(bedrock_content_block)
|
||||
return reasoning_content_blocks
|
||||
|
||||
|
||||
def _bedrock_converse_messages_pt( # noqa: PLR0915
|
||||
messages: List,
|
||||
|
@ -3109,11 +3135,23 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
|||
assistant_content: List[BedrockContentBlock] = []
|
||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||
|
||||
assistant_message_block = get_assistant_message_block_or_continue_message(
|
||||
message=messages[msg_i],
|
||||
assistant_continue_message=assistant_continue_message,
|
||||
)
|
||||
_assistant_content = assistant_message_block.get("content", None)
|
||||
thinking_blocks = cast(
|
||||
Optional[List[ChatCompletionThinkingBlock]],
|
||||
assistant_message_block.get("thinking_blocks"),
|
||||
)
|
||||
|
||||
if thinking_blocks is not None:
|
||||
assistant_content.extend(
|
||||
BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
|
||||
thinking_blocks
|
||||
)
|
||||
)
|
||||
|
||||
if _assistant_content is not None and isinstance(_assistant_content, list):
|
||||
assistants_parts: List[BedrockContentBlock] = []
|
||||
|
|
|
@ -5,7 +5,7 @@ import threading
|
|||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Any, Callable, Dict, List, Optional, cast
|
||||
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
|
@ -14,6 +14,7 @@ import litellm
|
|||
from litellm import verbose_logger
|
||||
from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
|
||||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||
from litellm.types.llms.openai import ChatCompletionChunk
|
||||
from litellm.types.utils import Delta
|
||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||
from litellm.types.utils import (
|
||||
|
@ -110,7 +111,7 @@ class CustomStreamWrapper:
|
|||
) # GUARANTEE OPENAI HEADERS IN RESPONSE
|
||||
|
||||
self._response_headers = _response_headers
|
||||
self.response_id = None
|
||||
self.response_id: Optional[str] = None
|
||||
self.logging_loop = None
|
||||
self.rules = Rules()
|
||||
self.stream_options = stream_options or getattr(
|
||||
|
@ -721,6 +722,39 @@ class CustomStreamWrapper:
|
|||
is_empty = False
|
||||
return is_empty
|
||||
|
||||
def set_model_id(
|
||||
self, id: str, model_response: ModelResponseStream
|
||||
) -> ModelResponseStream:
|
||||
"""
|
||||
Set the model id and response id to the given id.
|
||||
|
||||
Ensure model id is always the same across all chunks.
|
||||
|
||||
If first chunk sent + id set, use that id for all chunks.
|
||||
"""
|
||||
if self.response_id is None:
|
||||
self.response_id = id
|
||||
if self.response_id is not None and isinstance(self.response_id, str):
|
||||
model_response.id = self.response_id
|
||||
return model_response
|
||||
|
||||
def copy_model_response_level_provider_specific_fields(
|
||||
self,
|
||||
original_chunk: Union[ModelResponseStream, ChatCompletionChunk],
|
||||
model_response: ModelResponseStream,
|
||||
) -> ModelResponseStream:
|
||||
"""
|
||||
Copy provider_specific_fields from original_chunk to model_response.
|
||||
"""
|
||||
provider_specific_fields = getattr(
|
||||
original_chunk, "provider_specific_fields", None
|
||||
)
|
||||
if provider_specific_fields is not None:
|
||||
model_response.provider_specific_fields = provider_specific_fields
|
||||
for k, v in provider_specific_fields.items():
|
||||
setattr(model_response, k, v)
|
||||
return model_response
|
||||
|
||||
def return_processed_chunk_logic( # noqa
|
||||
self,
|
||||
completion_obj: Dict[str, Any],
|
||||
|
@ -747,6 +781,10 @@ class CustomStreamWrapper:
|
|||
and completion_obj["function_call"] is not None
|
||||
)
|
||||
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
||||
or (
|
||||
"provider_specific_fields" in model_response
|
||||
and model_response.choices[0].delta.provider_specific_fields is not None
|
||||
)
|
||||
or (
|
||||
"provider_specific_fields" in response_obj
|
||||
and response_obj["provider_specific_fields"] is not None
|
||||
|
@ -763,8 +801,6 @@ class CustomStreamWrapper:
|
|||
## check if openai/azure chunk
|
||||
original_chunk = response_obj.get("original_chunk", None)
|
||||
if original_chunk:
|
||||
model_response.id = original_chunk.id
|
||||
self.response_id = original_chunk.id
|
||||
if len(original_chunk.choices) > 0:
|
||||
choices = []
|
||||
for choice in original_chunk.choices:
|
||||
|
@ -798,9 +834,10 @@ class CustomStreamWrapper:
|
|||
model_response.choices[0].delta, "role"
|
||||
):
|
||||
_initial_delta = model_response.choices[0].delta.model_dump()
|
||||
|
||||
_initial_delta.pop("role", None)
|
||||
model_response.choices[0].delta = Delta(**_initial_delta)
|
||||
print_verbose(
|
||||
verbose_logger.debug(
|
||||
f"model_response.choices[0].delta: {model_response.choices[0].delta}"
|
||||
)
|
||||
else:
|
||||
|
@ -870,7 +907,7 @@ class CustomStreamWrapper:
|
|||
self.chunks.append(model_response)
|
||||
return
|
||||
|
||||
def chunk_creator(self, chunk): # type: ignore # noqa: PLR0915
|
||||
def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
|
||||
model_response = self.model_response_creator()
|
||||
response_obj: Dict[str, Any] = {}
|
||||
|
||||
|
@ -886,16 +923,13 @@ class CustomStreamWrapper:
|
|||
) # check if chunk is a generic streaming chunk
|
||||
) or (
|
||||
self.custom_llm_provider
|
||||
and (
|
||||
self.custom_llm_provider == "anthropic"
|
||||
or self.custom_llm_provider in litellm._custom_providers
|
||||
)
|
||||
and self.custom_llm_provider in litellm._custom_providers
|
||||
):
|
||||
|
||||
if self.received_finish_reason is not None:
|
||||
if "provider_specific_fields" not in chunk:
|
||||
raise StopIteration
|
||||
anthropic_response_obj: GChunk = chunk
|
||||
anthropic_response_obj: GChunk = cast(GChunk, chunk)
|
||||
completion_obj["content"] = anthropic_response_obj["text"]
|
||||
if anthropic_response_obj["is_finished"]:
|
||||
self.received_finish_reason = anthropic_response_obj[
|
||||
|
@ -927,7 +961,7 @@ class CustomStreamWrapper:
|
|||
].items():
|
||||
setattr(model_response, key, value)
|
||||
|
||||
response_obj = anthropic_response_obj
|
||||
response_obj = cast(Dict[str, Any], anthropic_response_obj)
|
||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||
response_obj = self.handle_replicate_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
@ -989,6 +1023,7 @@ class CustomStreamWrapper:
|
|||
try:
|
||||
completion_obj["content"] = chunk.text
|
||||
except Exception as e:
|
||||
original_exception = e
|
||||
if "Part has no text." in str(e):
|
||||
## check for function calling
|
||||
function_call = (
|
||||
|
@ -1030,7 +1065,7 @@ class CustomStreamWrapper:
|
|||
_model_response.choices = [_streaming_response]
|
||||
response_obj = {"original_chunk": _model_response}
|
||||
else:
|
||||
raise e
|
||||
raise original_exception
|
||||
if (
|
||||
hasattr(chunk.candidates[0], "finish_reason")
|
||||
and chunk.candidates[0].finish_reason.name
|
||||
|
@ -1093,8 +1128,9 @@ class CustomStreamWrapper:
|
|||
total_tokens=response_obj["usage"].total_tokens,
|
||||
)
|
||||
elif self.custom_llm_provider == "text-completion-codestral":
|
||||
response_obj = litellm.CodestralTextCompletionConfig()._chunk_parser(
|
||||
chunk
|
||||
response_obj = cast(
|
||||
Dict[str, Any],
|
||||
litellm.CodestralTextCompletionConfig()._chunk_parser(chunk),
|
||||
)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
|
@ -1156,8 +1192,9 @@ class CustomStreamWrapper:
|
|||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
if response_obj.get("original_chunk", None) is not None:
|
||||
if hasattr(response_obj["original_chunk"], "id"):
|
||||
model_response.id = response_obj["original_chunk"].id
|
||||
self.response_id = model_response.id
|
||||
model_response = self.set_model_id(
|
||||
response_obj["original_chunk"].id, model_response
|
||||
)
|
||||
if hasattr(response_obj["original_chunk"], "system_fingerprint"):
|
||||
model_response.system_fingerprint = response_obj[
|
||||
"original_chunk"
|
||||
|
@ -1206,8 +1243,16 @@ class CustomStreamWrapper:
|
|||
): # function / tool calling branch - only set for openai/azure compatible endpoints
|
||||
# enter this branch when no content has been passed in response
|
||||
original_chunk = response_obj.get("original_chunk", None)
|
||||
model_response.id = original_chunk.id
|
||||
self.response_id = original_chunk.id
|
||||
if hasattr(original_chunk, "id"):
|
||||
model_response = self.set_model_id(
|
||||
original_chunk.id, model_response
|
||||
)
|
||||
if hasattr(original_chunk, "provider_specific_fields"):
|
||||
model_response = (
|
||||
self.copy_model_response_level_provider_specific_fields(
|
||||
original_chunk, model_response
|
||||
)
|
||||
)
|
||||
if original_chunk.choices and len(original_chunk.choices) > 0:
|
||||
delta = original_chunk.choices[0].delta
|
||||
if delta is not None and (
|
||||
|
|
|
@ -34,7 +34,12 @@ from litellm.types.llms.openai import (
|
|||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
)
|
||||
from litellm.types.utils import GenericStreamingChunk
|
||||
from litellm.types.utils import (
|
||||
Delta,
|
||||
GenericStreamingChunk,
|
||||
ModelResponseStream,
|
||||
StreamingChoices,
|
||||
)
|
||||
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
||||
|
||||
from ...base import BaseLLM
|
||||
|
@ -507,7 +512,12 @@ class ModelResponseIterator:
|
|||
|
||||
return usage_block
|
||||
|
||||
def _content_block_delta_helper(self, chunk: dict):
|
||||
def _content_block_delta_helper(self, chunk: dict) -> Tuple[
|
||||
str,
|
||||
Optional[ChatCompletionToolCallChunk],
|
||||
List[ChatCompletionThinkingBlock],
|
||||
Dict[str, Any],
|
||||
]:
|
||||
"""
|
||||
Helper function to handle the content block delta
|
||||
"""
|
||||
|
@ -516,6 +526,7 @@ class ModelResponseIterator:
|
|||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
provider_specific_fields = {}
|
||||
content_block = ContentBlockDelta(**chunk) # type: ignore
|
||||
thinking_blocks: List[ChatCompletionThinkingBlock] = []
|
||||
self.content_blocks.append(content_block)
|
||||
if "text" in content_block["delta"]:
|
||||
text = content_block["delta"]["text"]
|
||||
|
@ -535,25 +546,41 @@ class ModelResponseIterator:
|
|||
"thinking" in content_block["delta"]
|
||||
or "signature_delta" == content_block["delta"]
|
||||
):
|
||||
provider_specific_fields["thinking_blocks"] = [
|
||||
thinking_blocks = [
|
||||
ChatCompletionThinkingBlock(
|
||||
type="thinking",
|
||||
thinking=content_block["delta"].get("thinking"),
|
||||
signature_delta=content_block["delta"].get("signature"),
|
||||
)
|
||||
]
|
||||
return text, tool_use, provider_specific_fields
|
||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||
return text, tool_use, thinking_blocks, provider_specific_fields
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
||||
def _handle_reasoning_content(
|
||||
self, thinking_blocks: List[ChatCompletionThinkingBlock]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Handle the reasoning content
|
||||
"""
|
||||
reasoning_content = None
|
||||
for block in thinking_blocks:
|
||||
if reasoning_content is None:
|
||||
reasoning_content = ""
|
||||
if "thinking" in block:
|
||||
reasoning_content += block["thinking"]
|
||||
return reasoning_content
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||
try:
|
||||
type_chunk = chunk.get("type", "") or ""
|
||||
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
usage: Optional[ChatCompletionUsageBlock] = None
|
||||
provider_specific_fields: Dict[str, Any] = {}
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
|
||||
index = int(chunk.get("index", 0))
|
||||
if type_chunk == "content_block_delta":
|
||||
|
@ -561,9 +588,13 @@ class ModelResponseIterator:
|
|||
Anthropic content chunk
|
||||
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
|
||||
"""
|
||||
text, tool_use, provider_specific_fields = (
|
||||
text, tool_use, thinking_blocks, provider_specific_fields = (
|
||||
self._content_block_delta_helper(chunk=chunk)
|
||||
)
|
||||
if thinking_blocks:
|
||||
reasoning_content = self._handle_reasoning_content(
|
||||
thinking_blocks=thinking_blocks
|
||||
)
|
||||
elif type_chunk == "content_block_start":
|
||||
"""
|
||||
event: content_block_start
|
||||
|
@ -610,7 +641,6 @@ class ModelResponseIterator:
|
|||
or "stop"
|
||||
)
|
||||
usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"])
|
||||
is_finished = True
|
||||
elif type_chunk == "message_start":
|
||||
"""
|
||||
Anthropic
|
||||
|
@ -649,16 +679,27 @@ class ModelResponseIterator:
|
|||
|
||||
text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
|
||||
|
||||
returned_chunk = GenericStreamingChunk(
|
||||
text=text,
|
||||
tool_use=tool_use,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
returned_chunk = ModelResponseStream(
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
index=index,
|
||||
delta=Delta(
|
||||
content=text,
|
||||
tool_calls=[tool_use] if tool_use is not None else None,
|
||||
provider_specific_fields=(
|
||||
provider_specific_fields
|
||||
if provider_specific_fields
|
||||
else None
|
||||
),
|
||||
thinking_blocks=(
|
||||
thinking_blocks if thinking_blocks else None
|
||||
),
|
||||
reasoning_content=reasoning_content,
|
||||
),
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
],
|
||||
usage=usage,
|
||||
index=index,
|
||||
provider_specific_fields=(
|
||||
provider_specific_fields if provider_specific_fields else None
|
||||
),
|
||||
)
|
||||
|
||||
return returned_chunk
|
||||
|
@ -769,7 +810,9 @@ class ModelResponseIterator:
|
|||
except ValueError as e:
|
||||
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
|
||||
|
||||
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
|
||||
def convert_str_chunk_to_generic_chunk(
|
||||
self, chunk: str
|
||||
) -> Union[GenericStreamingChunk, ModelResponseStream]:
|
||||
"""
|
||||
Convert a string chunk to a GenericStreamingChunk
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
|
|||
AllMessageValues,
|
||||
ChatCompletionCachedContent,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolParam,
|
||||
|
@ -591,12 +592,14 @@ class AnthropicConfig(BaseConfig):
|
|||
def extract_response_content(self, completion_response: dict) -> Tuple[
|
||||
str,
|
||||
Optional[List[Any]],
|
||||
Optional[List[Dict[str, Any]]],
|
||||
Optional[List[ChatCompletionThinkingBlock]],
|
||||
Optional[str],
|
||||
List[ChatCompletionToolCallChunk],
|
||||
]:
|
||||
text_content = ""
|
||||
citations: Optional[List[Any]] = None
|
||||
thinking_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||
for idx, content in enumerate(completion_response["content"]):
|
||||
if content["type"] == "text":
|
||||
|
@ -622,8 +625,13 @@ class AnthropicConfig(BaseConfig):
|
|||
if content.get("thinking", None) is not None:
|
||||
if thinking_blocks is None:
|
||||
thinking_blocks = []
|
||||
thinking_blocks.append(content)
|
||||
return text_content, citations, thinking_blocks, tool_calls
|
||||
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
||||
if thinking_blocks is not None:
|
||||
reasoning_content = ""
|
||||
for block in thinking_blocks:
|
||||
if "thinking" in block:
|
||||
reasoning_content += block["thinking"]
|
||||
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
||||
|
||||
def transform_response(
|
||||
self,
|
||||
|
@ -673,10 +681,11 @@ class AnthropicConfig(BaseConfig):
|
|||
else:
|
||||
text_content = ""
|
||||
citations: Optional[List[Any]] = None
|
||||
thinking_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||
|
||||
text_content, citations, thinking_blocks, tool_calls = (
|
||||
text_content, citations, thinking_blocks, reasoning_content, tool_calls = (
|
||||
self.extract_response_content(completion_response=completion_response)
|
||||
)
|
||||
|
||||
|
@ -687,6 +696,8 @@ class AnthropicConfig(BaseConfig):
|
|||
"citations": citations,
|
||||
"thinking_blocks": thinking_blocks,
|
||||
},
|
||||
thinking_blocks=thinking_blocks,
|
||||
reasoning_content=reasoning_content,
|
||||
)
|
||||
|
||||
## HANDLE JSON MODE - anthropic returns single function call
|
||||
|
|
|
@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
|
|||
AllMessageValues,
|
||||
ChatCompletionResponseMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolParam,
|
||||
|
@ -545,6 +546,37 @@ class AmazonConverseConfig(BaseConfig):
|
|||
encoding=encoding,
|
||||
)
|
||||
|
||||
def _transform_reasoning_content(
|
||||
self, reasoning_content_blocks: List[BedrockConverseReasoningContentBlock]
|
||||
) -> str:
|
||||
"""
|
||||
Extract the reasoning text from the reasoning content blocks
|
||||
|
||||
Ensures deepseek reasoning content compatible output.
|
||||
"""
|
||||
reasoning_content_str = ""
|
||||
for block in reasoning_content_blocks:
|
||||
if "reasoningText" in block:
|
||||
reasoning_content_str += block["reasoningText"]["text"]
|
||||
return reasoning_content_str
|
||||
|
||||
def _transform_thinking_blocks(
|
||||
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
||||
) -> List[ChatCompletionThinkingBlock]:
|
||||
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||
for block in thinking_blocks:
|
||||
if "reasoningText" in block:
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
_text = block["reasoningText"].get("text")
|
||||
_signature = block["reasoningText"].get("signature")
|
||||
if _text is not None:
|
||||
_thinking_block["thinking"] = _text
|
||||
if _signature is not None:
|
||||
_thinking_block["signature_delta"] = _signature
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
def _transform_response(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -618,6 +650,10 @@ class AmazonConverseConfig(BaseConfig):
|
|||
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
|
||||
content_str = ""
|
||||
tools: List[ChatCompletionToolCallChunk] = []
|
||||
reasoningContentBlocks: Optional[List[BedrockConverseReasoningContentBlock]] = (
|
||||
None
|
||||
)
|
||||
|
||||
if message is not None:
|
||||
for idx, content in enumerate(message["content"]):
|
||||
"""
|
||||
|
@ -644,8 +680,22 @@ class AmazonConverseConfig(BaseConfig):
|
|||
index=idx,
|
||||
)
|
||||
tools.append(_tool_response_chunk)
|
||||
chat_completion_message["content"] = content_str
|
||||
if "reasoningContent" in content:
|
||||
if reasoningContentBlocks is None:
|
||||
reasoningContentBlocks = []
|
||||
reasoningContentBlocks.append(content["reasoningContent"])
|
||||
|
||||
if reasoningContentBlocks is not None:
|
||||
chat_completion_message["provider_specific_fields"] = {
|
||||
"reasoningContentBlocks": reasoningContentBlocks,
|
||||
}
|
||||
chat_completion_message["reasoning_content"] = (
|
||||
self._transform_reasoning_content(reasoningContentBlocks)
|
||||
)
|
||||
chat_completion_message["thinking_blocks"] = (
|
||||
self._transform_thinking_blocks(reasoningContentBlocks)
|
||||
)
|
||||
chat_completion_message["content"] = content_str
|
||||
if json_mode is True and tools is not None and len(tools) == 1:
|
||||
# to support 'json_schema' logic on bedrock models
|
||||
json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments")
|
||||
|
|
|
@ -26,7 +26,6 @@ import httpx # type: ignore
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm._logging import print_verbose
|
||||
from litellm.caching.caching import InMemoryCache
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
|
@ -51,13 +50,19 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
)
|
||||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
)
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Choices, Delta
|
||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream, Usage
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
StreamingChoices,
|
||||
Usage,
|
||||
)
|
||||
from litellm.utils import CustomStreamWrapper, get_secret
|
||||
|
||||
from ..base_aws_llm import BaseAWSLLM
|
||||
|
@ -212,7 +217,6 @@ async def make_call(
|
|||
api_key="",
|
||||
data=data,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
encoding=litellm.encoding,
|
||||
) # type: ignore
|
||||
completion_stream: Any = MockResponseIterator(
|
||||
|
@ -298,7 +302,6 @@ def make_sync_call(
|
|||
api_key="",
|
||||
data=data,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
encoding=litellm.encoding,
|
||||
) # type: ignore
|
||||
completion_stream: Any = MockResponseIterator(
|
||||
|
@ -525,7 +528,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
].message.tool_calls:
|
||||
_tool_call = {**tool_call.dict(), "index": 0}
|
||||
_tool_calls.append(_tool_call)
|
||||
delta_obj = litellm.utils.Delta(
|
||||
delta_obj = Delta(
|
||||
content=getattr(
|
||||
model_response.choices[0].message, "content", None
|
||||
),
|
||||
|
@ -1258,14 +1261,37 @@ class AWSEventStreamDecoder:
|
|||
return True
|
||||
return False
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> GChunk:
|
||||
def extract_reasoning_content_str(
|
||||
self, reasoning_content_block: BedrockConverseReasoningContentBlockDelta
|
||||
) -> Optional[str]:
|
||||
if "text" in reasoning_content_block:
|
||||
return reasoning_content_block["text"]
|
||||
return None
|
||||
|
||||
def translate_thinking_blocks(
|
||||
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
||||
"""
|
||||
Translate the thinking blocks to a string
|
||||
"""
|
||||
|
||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
if "text" in thinking_block:
|
||||
_thinking_block["thinking"] = thinking_block["text"]
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||
try:
|
||||
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
usage: Optional[ChatCompletionUsageBlock] = None
|
||||
provider_specific_fields: dict = {}
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
|
||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||
if "start" in chunk_data:
|
||||
|
@ -1305,6 +1331,16 @@ class AWSEventStreamDecoder:
|
|||
},
|
||||
"index": index,
|
||||
}
|
||||
elif "reasoningContent" in delta_obj:
|
||||
provider_specific_fields = {
|
||||
"reasoningContent": delta_obj["reasoningContent"],
|
||||
}
|
||||
reasoning_content = self.extract_reasoning_content_str(
|
||||
delta_obj["reasoningContent"]
|
||||
)
|
||||
thinking_blocks = self.translate_thinking_blocks(
|
||||
delta_obj["reasoningContent"]
|
||||
)
|
||||
elif (
|
||||
"contentBlockIndex" in chunk_data
|
||||
): # stop block, no 'start' or 'delta' object
|
||||
|
@ -1321,7 +1357,6 @@ class AWSEventStreamDecoder:
|
|||
}
|
||||
elif "stopReason" in chunk_data:
|
||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||
is_finished = True
|
||||
elif "usage" in chunk_data:
|
||||
usage = ChatCompletionUsageBlock(
|
||||
prompt_tokens=chunk_data.get("inputTokens", 0),
|
||||
|
@ -1329,18 +1364,33 @@ class AWSEventStreamDecoder:
|
|||
total_tokens=chunk_data.get("totalTokens", 0),
|
||||
)
|
||||
|
||||
response = GChunk(
|
||||
text=text,
|
||||
tool_use=tool_use,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
index=index,
|
||||
)
|
||||
|
||||
model_response_provider_specific_fields = {}
|
||||
if "trace" in chunk_data:
|
||||
trace = chunk_data.get("trace")
|
||||
response["provider_specific_fields"] = {"trace": trace}
|
||||
model_response_provider_specific_fields["trace"] = trace
|
||||
response = ModelResponseStream(
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=finish_reason,
|
||||
index=index,
|
||||
delta=Delta(
|
||||
content=text,
|
||||
role="assistant",
|
||||
tool_calls=[tool_use] if tool_use else None,
|
||||
provider_specific_fields=(
|
||||
provider_specific_fields
|
||||
if provider_specific_fields
|
||||
else None
|
||||
),
|
||||
thinking_blocks=thinking_blocks,
|
||||
reasoning_content=reasoning_content,
|
||||
),
|
||||
)
|
||||
],
|
||||
usage=usage,
|
||||
provider_specific_fields=model_response_provider_specific_fields,
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise Exception("Received streaming error - {}".format(str(e)))
|
||||
|
@ -1486,7 +1536,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder):
|
|||
sync_stream=sync_stream,
|
||||
)
|
||||
|
||||
def _chunk_parser(self, chunk_data: dict) -> GChunk:
|
||||
def _chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||
return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data)
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
model_list:
|
||||
- model_name: claude-3.5
|
||||
- model_name: claude-3.7
|
||||
litellm_params:
|
||||
<<<<<<< HEAD
|
||||
model: claude-3-5-sonnet-latest
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
=======
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: http://0.0.0.0:8090
|
||||
|
@ -20,5 +16,4 @@ model_list:
|
|||
api_key: os.environ/COHERE_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["langfuse"]
|
||||
>>>>>>> f86a609ea (fix(get_litellm_params.py): handle no-log being passed in via kwargs)
|
||||
callbacks: ["langfuse"]
|
|
@ -66,6 +66,22 @@ class ToolUseBlock(TypedDict):
|
|||
toolUseId: str
|
||||
|
||||
|
||||
class BedrockConverseReasoningTextBlock(TypedDict, total=False):
|
||||
text: Required[str]
|
||||
signature: str
|
||||
|
||||
|
||||
class BedrockConverseReasoningContentBlock(TypedDict, total=False):
|
||||
reasoningText: BedrockConverseReasoningTextBlock
|
||||
redactedContent: str
|
||||
|
||||
|
||||
class BedrockConverseReasoningContentBlockDelta(TypedDict, total=False):
|
||||
signature: str
|
||||
redactedContent: str
|
||||
text: str
|
||||
|
||||
|
||||
class ContentBlock(TypedDict, total=False):
|
||||
text: str
|
||||
image: ImageBlock
|
||||
|
@ -73,6 +89,7 @@ class ContentBlock(TypedDict, total=False):
|
|||
toolResult: ToolResultBlock
|
||||
toolUse: ToolUseBlock
|
||||
cachePoint: CachePointBlock
|
||||
reasoningContent: BedrockConverseReasoningContentBlock
|
||||
|
||||
|
||||
class MessageBlock(TypedDict):
|
||||
|
@ -167,6 +184,7 @@ class ContentBlockDeltaEvent(TypedDict, total=False):
|
|||
|
||||
text: str
|
||||
toolUse: ToolBlockDeltaEvent
|
||||
reasoningContent: BedrockConverseReasoningContentBlockDelta
|
||||
|
||||
|
||||
class CommonRequestObject(
|
||||
|
|
|
@ -596,6 +596,9 @@ class ChatCompletionResponseMessage(TypedDict, total=False):
|
|||
tool_calls: Optional[List[ChatCompletionToolCallChunk]]
|
||||
role: Literal["assistant"]
|
||||
function_call: Optional[ChatCompletionToolCallFunctionChunk]
|
||||
provider_specific_fields: Optional[dict]
|
||||
reasoning_content: Optional[str]
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]]
|
||||
|
||||
|
||||
class ChatCompletionUsageBlock(TypedDict):
|
||||
|
|
|
@ -24,6 +24,7 @@ from typing_extensions import Callable, Dict, Required, TypedDict, override
|
|||
from ..litellm_core_utils.core_helpers import map_finish_reason
|
||||
from .guardrails import GuardrailEventHooks
|
||||
from .llms.openai import (
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
OpenAIChatCompletionChunk,
|
||||
|
@ -457,29 +458,6 @@ Reference:
|
|||
ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None))
|
||||
"""
|
||||
|
||||
REASONING_CONTENT_COMPATIBLE_PARAMS = [
|
||||
"thinking_blocks",
|
||||
"reasoning_content",
|
||||
]
|
||||
|
||||
|
||||
def map_reasoning_content(provider_specific_fields: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Extract reasoning_content from provider_specific_fields
|
||||
"""
|
||||
|
||||
reasoning_content: str = ""
|
||||
for k, v in provider_specific_fields.items():
|
||||
if k == "thinking_blocks" and isinstance(v, list):
|
||||
_reasoning_content = ""
|
||||
for block in v:
|
||||
if block.get("type") == "thinking":
|
||||
_reasoning_content += block.get("thinking", "")
|
||||
reasoning_content = _reasoning_content
|
||||
elif k == "reasoning_content":
|
||||
reasoning_content = v
|
||||
return reasoning_content
|
||||
|
||||
|
||||
def add_provider_specific_fields(
|
||||
object: BaseModel, provider_specific_fields: Optional[Dict[str, Any]]
|
||||
|
@ -487,12 +465,6 @@ def add_provider_specific_fields(
|
|||
if not provider_specific_fields: # set if provider_specific_fields is not empty
|
||||
return
|
||||
setattr(object, "provider_specific_fields", provider_specific_fields)
|
||||
for k, v in provider_specific_fields.items():
|
||||
if v is not None:
|
||||
setattr(object, k, v)
|
||||
if k in REASONING_CONTENT_COMPATIBLE_PARAMS and k != "reasoning_content":
|
||||
reasoning_content = map_reasoning_content({k: v})
|
||||
setattr(object, "reasoning_content", reasoning_content)
|
||||
|
||||
|
||||
class Message(OpenAIObject):
|
||||
|
@ -501,6 +473,8 @@ class Message(OpenAIObject):
|
|||
tool_calls: Optional[List[ChatCompletionMessageToolCall]]
|
||||
function_call: Optional[FunctionCall]
|
||||
audio: Optional[ChatCompletionAudioResponse] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
||||
default=None, exclude=True
|
||||
)
|
||||
|
@ -513,6 +487,8 @@ class Message(OpenAIObject):
|
|||
tool_calls: Optional[list] = None,
|
||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
||||
reasoning_content: Optional[str] = None,
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
||||
**params,
|
||||
):
|
||||
init_values: Dict[str, Any] = {
|
||||
|
@ -538,6 +514,12 @@ class Message(OpenAIObject):
|
|||
if audio is not None:
|
||||
init_values["audio"] = audio
|
||||
|
||||
if thinking_blocks is not None:
|
||||
init_values["thinking_blocks"] = thinking_blocks
|
||||
|
||||
if reasoning_content is not None:
|
||||
init_values["reasoning_content"] = reasoning_content
|
||||
|
||||
super(Message, self).__init__(
|
||||
**init_values, # type: ignore
|
||||
**params,
|
||||
|
@ -548,6 +530,14 @@ class Message(OpenAIObject):
|
|||
# OpenAI compatible APIs like mistral API will raise an error if audio is passed in
|
||||
del self.audio
|
||||
|
||||
if reasoning_content is None:
|
||||
# ensure default response matches OpenAI spec
|
||||
del self.reasoning_content
|
||||
|
||||
if thinking_blocks is None:
|
||||
# ensure default response matches OpenAI spec
|
||||
del self.thinking_blocks
|
||||
|
||||
add_provider_specific_fields(self, provider_specific_fields)
|
||||
|
||||
def get(self, key, default=None):
|
||||
|
@ -571,9 +561,9 @@ class Message(OpenAIObject):
|
|||
|
||||
|
||||
class Delta(OpenAIObject):
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(
|
||||
default=None, exclude=True
|
||||
)
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -582,6 +572,8 @@ class Delta(OpenAIObject):
|
|||
function_call=None,
|
||||
tool_calls=None,
|
||||
audio: Optional[ChatCompletionAudioResponse] = None,
|
||||
reasoning_content: Optional[str] = None,
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None,
|
||||
**params,
|
||||
):
|
||||
super(Delta, self).__init__(**params)
|
||||
|
@ -593,6 +585,18 @@ class Delta(OpenAIObject):
|
|||
self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None
|
||||
self.audio: Optional[ChatCompletionAudioResponse] = None
|
||||
|
||||
if reasoning_content is not None:
|
||||
self.reasoning_content = reasoning_content
|
||||
else:
|
||||
# ensure default response matches OpenAI spec
|
||||
del self.reasoning_content
|
||||
|
||||
if thinking_blocks is not None:
|
||||
self.thinking_blocks = thinking_blocks
|
||||
else:
|
||||
# ensure default response matches OpenAI spec
|
||||
del self.thinking_blocks
|
||||
|
||||
if function_call is not None and isinstance(function_call, dict):
|
||||
self.function_call = FunctionCall(**function_call)
|
||||
else:
|
||||
|
@ -894,12 +898,14 @@ class ModelResponseBase(OpenAIObject):
|
|||
|
||||
class ModelResponseStream(ModelResponseBase):
|
||||
choices: List[StreamingChoices]
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = Field(default=None)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None,
|
||||
id: Optional[str] = None,
|
||||
created: Optional[int] = None,
|
||||
provider_specific_fields: Optional[Dict[str, Any]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
if choices is not None and isinstance(choices, list):
|
||||
|
@ -936,6 +942,7 @@ class ModelResponseStream(ModelResponseBase):
|
|||
kwargs["id"] = id
|
||||
kwargs["created"] = created
|
||||
kwargs["object"] = "chat.completion.chunk"
|
||||
kwargs["provider_specific_fields"] = provider_specific_fields
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
|
|
@ -1970,25 +1970,31 @@ def test_get_applied_guardrails(test_case):
|
|||
# Assert
|
||||
assert sorted(result) == sorted(test_case["expected"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"endpoint, params, expected_bool",
|
||||
[
|
||||
("localhost:4000/v1/rerank", ["max_chunks_per_doc"], True),
|
||||
("localhost:4000/v2/rerank", ["max_chunks_per_doc"], False),
|
||||
("localhost:4000", ["max_chunks_per_doc"], True),
|
||||
|
||||
("localhost:4000/v1/rerank", ["max_tokens_per_doc"], True),
|
||||
("localhost:4000/v2/rerank", ["max_tokens_per_doc"], False),
|
||||
("localhost:4000", ["max_tokens_per_doc"], False),
|
||||
|
||||
("localhost:4000/v1/rerank", ["max_chunks_per_doc", "max_tokens_per_doc"], True),
|
||||
("localhost:4000/v2/rerank", ["max_chunks_per_doc", "max_tokens_per_doc"], False),
|
||||
(
|
||||
"localhost:4000/v1/rerank",
|
||||
["max_chunks_per_doc", "max_tokens_per_doc"],
|
||||
True,
|
||||
),
|
||||
(
|
||||
"localhost:4000/v2/rerank",
|
||||
["max_chunks_per_doc", "max_tokens_per_doc"],
|
||||
False,
|
||||
),
|
||||
("localhost:4000", ["max_chunks_per_doc", "max_tokens_per_doc"], False),
|
||||
|
||||
],
|
||||
)
|
||||
def test_should_use_cohere_v1_client(endpoint, params, expected_bool):
|
||||
assert(litellm.utils.should_use_cohere_v1_client(endpoint, params) == expected_bool)
|
||||
assert litellm.utils.should_use_cohere_v1_client(endpoint, params) == expected_bool
|
||||
|
||||
|
||||
def test_add_openai_metadata():
|
||||
|
@ -2008,3 +2014,24 @@ def test_add_openai_metadata():
|
|||
assert result == {
|
||||
"user_api_key_end_user_id": "123",
|
||||
}
|
||||
|
||||
|
||||
def test_message_object():
|
||||
from litellm.types.utils import Message
|
||||
|
||||
message = Message(content="Hello, world!", role="user")
|
||||
assert message.content == "Hello, world!"
|
||||
assert message.role == "user"
|
||||
assert not hasattr(message, "audio")
|
||||
assert not hasattr(message, "thinking_blocks")
|
||||
assert not hasattr(message, "reasoning_content")
|
||||
|
||||
|
||||
def test_delta_object():
|
||||
from litellm.types.utils import Delta
|
||||
|
||||
delta = Delta(content="Hello, world!", role="user")
|
||||
assert delta.content == "Hello, world!"
|
||||
assert delta.role == "user"
|
||||
assert not hasattr(delta, "thinking_blocks")
|
||||
assert not hasattr(delta, "reasoning_content")
|
||||
|
|
|
@ -1163,21 +1163,25 @@ def test_anthropic_citations_api_streaming():
|
|||
assert has_citations
|
||||
|
||||
|
||||
def test_anthropic_thinking_output():
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"anthropic/claude-3-7-sonnet-20250219",
|
||||
"bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
],
|
||||
)
|
||||
def test_anthropic_thinking_output(model):
|
||||
from litellm import completion
|
||||
|
||||
litellm._turn_on_debug()
|
||||
|
||||
resp = completion(
|
||||
model="anthropic/claude-3-7-sonnet-20250219",
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
)
|
||||
|
||||
print(resp)
|
||||
assert (
|
||||
resp.choices[0].message.provider_specific_fields["thinking_blocks"] is not None
|
||||
)
|
||||
assert resp.choices[0].message.reasoning_content is not None
|
||||
assert isinstance(resp.choices[0].message.reasoning_content, str)
|
||||
assert resp.choices[0].message.thinking_blocks is not None
|
||||
|
@ -1185,12 +1189,19 @@ def test_anthropic_thinking_output():
|
|||
assert len(resp.choices[0].message.thinking_blocks) > 0
|
||||
|
||||
|
||||
def test_anthropic_thinking_output_stream():
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"anthropic/claude-3-7-sonnet-20250219",
|
||||
"bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
],
|
||||
)
|
||||
def test_anthropic_thinking_output_stream(model):
|
||||
# litellm.set_verbose = True
|
||||
try:
|
||||
# litellm._turn_on_debug()
|
||||
litellm._turn_on_debug()
|
||||
resp = litellm.completion(
|
||||
model="anthropic/claude-3-7-sonnet-20250219",
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||
stream=True,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
|
|
|
@ -131,7 +131,7 @@ def test_completion_bedrock_guardrails(streaming):
|
|||
|
||||
print("TRACE=", response.trace)
|
||||
else:
|
||||
|
||||
litellm.set_verbose = True
|
||||
response = completion(
|
||||
model="anthropic.claude-v2",
|
||||
messages=[
|
||||
|
|
|
@ -108,10 +108,10 @@ def test_nova_invoke_streaming_chunk_parsing():
|
|||
}
|
||||
}
|
||||
result = decoder._chunk_parser(nova_text_chunk)
|
||||
assert result["text"] == "Hello, how can I help?"
|
||||
assert result["index"] == 0
|
||||
assert not result["is_finished"]
|
||||
assert result["tool_use"] is None
|
||||
assert result.choices[0].delta.content == "Hello, how can I help?"
|
||||
assert result.choices[0].index == 0
|
||||
assert not result.choices[0].finish_reason
|
||||
assert result.choices[0].delta.tool_calls is None
|
||||
|
||||
# Test case 2: Tool use start in contentBlockDelta
|
||||
nova_tool_start_chunk = {
|
||||
|
@ -121,12 +121,12 @@ def test_nova_invoke_streaming_chunk_parsing():
|
|||
}
|
||||
}
|
||||
result = decoder._chunk_parser(nova_tool_start_chunk)
|
||||
assert result["text"] == ""
|
||||
assert result["index"] == 1
|
||||
assert result["tool_use"] is not None
|
||||
assert result["tool_use"]["type"] == "function"
|
||||
assert result["tool_use"]["function"]["name"] == "get_weather"
|
||||
assert result["tool_use"]["id"] == "tool_1"
|
||||
assert result.choices[0].delta.content == ""
|
||||
assert result.choices[0].index == 1
|
||||
assert result.choices[0].delta.tool_calls is not None
|
||||
assert result.choices[0].delta.tool_calls[0].type == "function"
|
||||
assert result.choices[0].delta.tool_calls[0].function.name == "get_weather"
|
||||
assert result.choices[0].delta.tool_calls[0].id == "tool_1"
|
||||
|
||||
# Test case 3: Tool use arguments in contentBlockDelta
|
||||
nova_tool_args_chunk = {
|
||||
|
@ -136,10 +136,13 @@ def test_nova_invoke_streaming_chunk_parsing():
|
|||
}
|
||||
}
|
||||
result = decoder._chunk_parser(nova_tool_args_chunk)
|
||||
assert result["text"] == ""
|
||||
assert result["index"] == 2
|
||||
assert result["tool_use"] is not None
|
||||
assert result["tool_use"]["function"]["arguments"] == '{"location": "New York"}'
|
||||
assert result.choices[0].delta.content == ""
|
||||
assert result.choices[0].index == 2
|
||||
assert result.choices[0].delta.tool_calls is not None
|
||||
assert (
|
||||
result.choices[0].delta.tool_calls[0].function.arguments
|
||||
== '{"location": "New York"}'
|
||||
)
|
||||
|
||||
# Test case 4: Stop reason in contentBlockDelta
|
||||
nova_stop_chunk = {
|
||||
|
@ -149,5 +152,4 @@ def test_nova_invoke_streaming_chunk_parsing():
|
|||
}
|
||||
result = decoder._chunk_parser(nova_stop_chunk)
|
||||
print(result)
|
||||
assert result["is_finished"] is True
|
||||
assert result["finish_reason"] == "tool_calls"
|
||||
assert result.choices[0].finish_reason == "tool_calls"
|
||||
|
|
|
@ -280,6 +280,13 @@ class TestOpenAIChatCompletion(BaseLLMChatTest):
|
|||
"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
|
||||
pass
|
||||
|
||||
def test_prompt_caching(self):
|
||||
"""
|
||||
Test that prompt caching works correctly.
|
||||
Skip for now, as it's working locally but not in CI
|
||||
"""
|
||||
pass
|
||||
|
||||
def test_multilingual_requests(self):
|
||||
"""
|
||||
Tests that the provider can handle multilingual requests and invalid utf-8 sequences
|
||||
|
|
|
@ -161,6 +161,7 @@ def test_aaparallel_function_call(model):
|
|||
"model",
|
||||
[
|
||||
"anthropic/claude-3-7-sonnet-20250219",
|
||||
"bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||
],
|
||||
)
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
|
|
|
@ -1621,7 +1621,7 @@ def test_completion_replicate_stream_bad_key():
|
|||
|
||||
def test_completion_bedrock_claude_stream():
|
||||
try:
|
||||
litellm.set_verbose = False
|
||||
litellm.set_verbose = True
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
messages=[
|
||||
|
|
|
@ -130,14 +130,7 @@ async def test_create_llm_obs_payload():
|
|||
assert payload["meta"]["input"]["messages"] == [
|
||||
{"role": "user", "content": "Hello, world!"}
|
||||
]
|
||||
assert payload["meta"]["output"]["messages"] == [
|
||||
{
|
||||
"content": "Hi there!",
|
||||
"role": "assistant",
|
||||
"tool_calls": None,
|
||||
"function_call": None,
|
||||
}
|
||||
]
|
||||
assert payload["meta"]["output"]["messages"][0]["content"] == "Hi there!"
|
||||
assert payload["metrics"]["input_tokens"] == 20
|
||||
assert payload["metrics"]["output_tokens"] == 10
|
||||
assert payload["metrics"]["total_tokens"] == 30
|
||||
|
|
|
@ -359,12 +359,8 @@ def test_get_chat_content_for_langfuse():
|
|||
)
|
||||
|
||||
result = LangFuseLogger._get_chat_content_for_langfuse(mock_response)
|
||||
assert result == {
|
||||
"content": "Hello world",
|
||||
"role": "assistant",
|
||||
"tool_calls": None,
|
||||
"function_call": None,
|
||||
}
|
||||
assert result["content"] == "Hello world"
|
||||
assert result["role"] == "assistant"
|
||||
|
||||
# Test with empty choices
|
||||
mock_response = ModelResponse(choices=[])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue