mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Litellm dev bedrock anthropic 3 7 v2 (#8843)
* feat(bedrock/converse/transformation.py): support claude-3-7-sonnet reasoning_Content transformation Closes https://github.com/BerriAI/litellm/issues/8777 * fix(bedrock/): support returning `reasoning_content` on streaming for claude-3-7 Resolves https://github.com/BerriAI/litellm/issues/8777 * feat(bedrock/): unify converse reasoning content blocks for consistency across anthropic and bedrock * fix(anthropic/chat/transformation.py): handle deepseek-style 'reasoning_content' extraction within transformation.py simpler logic * feat(bedrock/): fix streaming to return blocks in consistent format * fix: fix linting error * test: fix test * feat(factory.py): fix bedrock thinking block translation on tool calling allows passing the thinking blocks back to bedrock for tool calling * fix(types/utils.py): don't exclude provider_specific_fields on model dump ensures consistent responses * fix: fix linting errors * fix(convert_dict_to_response.py): pass reasoning_content on root * fix: test * fix(streaming_handler.py): add helper util for setting model id * fix(streaming_handler.py): fix setting model id on model response stream chunk * fix(streaming_handler.py): fix linting error * fix(streaming_handler.py): fix linting error * fix(types/utils.py): add provider_specific_fields to model stream response * fix(streaming_handler.py): copy provider specific fields and add them to the root of the streaming response * fix(streaming_handler.py): fix check * fix: fix test * fix(types/utils.py): ensure messages content is always openai compatible * fix(types/utils.py): fix delta object to always be openai compatible only introduce new params if variable exists * test: fix bedrock nova tests * test: skip flaky test * test: skip flaky test in ci/cd
This commit is contained in:
parent
f3ef6c92a3
commit
05a973bf19
20 changed files with 447 additions and 149 deletions
|
@ -26,7 +26,6 @@ import httpx # type: ignore
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm._logging import print_verbose
|
||||
from litellm.caching.caching import InMemoryCache
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
|
@ -51,13 +50,19 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
)
|
||||
from litellm.types.llms.bedrock import *
|
||||
from litellm.types.llms.openai import (
|
||||
ChatCompletionThinkingBlock,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionUsageBlock,
|
||||
)
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
|
||||
from litellm.types.utils import ChatCompletionMessageToolCall, Choices, Delta
|
||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream, Usage
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
StreamingChoices,
|
||||
Usage,
|
||||
)
|
||||
from litellm.utils import CustomStreamWrapper, get_secret
|
||||
|
||||
from ..base_aws_llm import BaseAWSLLM
|
||||
|
@ -212,7 +217,6 @@ async def make_call(
|
|||
api_key="",
|
||||
data=data,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
encoding=litellm.encoding,
|
||||
) # type: ignore
|
||||
completion_stream: Any = MockResponseIterator(
|
||||
|
@ -298,7 +302,6 @@ def make_sync_call(
|
|||
api_key="",
|
||||
data=data,
|
||||
messages=messages,
|
||||
print_verbose=print_verbose,
|
||||
encoding=litellm.encoding,
|
||||
) # type: ignore
|
||||
completion_stream: Any = MockResponseIterator(
|
||||
|
@ -525,7 +528,7 @@ class BedrockLLM(BaseAWSLLM):
|
|||
].message.tool_calls:
|
||||
_tool_call = {**tool_call.dict(), "index": 0}
|
||||
_tool_calls.append(_tool_call)
|
||||
delta_obj = litellm.utils.Delta(
|
||||
delta_obj = Delta(
|
||||
content=getattr(
|
||||
model_response.choices[0].message, "content", None
|
||||
),
|
||||
|
@ -1258,14 +1261,37 @@ class AWSEventStreamDecoder:
|
|||
return True
|
||||
return False
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> GChunk:
|
||||
def extract_reasoning_content_str(
|
||||
self, reasoning_content_block: BedrockConverseReasoningContentBlockDelta
|
||||
) -> Optional[str]:
|
||||
if "text" in reasoning_content_block:
|
||||
return reasoning_content_block["text"]
|
||||
return None
|
||||
|
||||
def translate_thinking_blocks(
|
||||
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
||||
"""
|
||||
Translate the thinking blocks to a string
|
||||
"""
|
||||
|
||||
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
if "text" in thinking_block:
|
||||
_thinking_block["thinking"] = thinking_block["text"]
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||
try:
|
||||
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
|
||||
text = ""
|
||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||
is_finished = False
|
||||
finish_reason = ""
|
||||
usage: Optional[ChatCompletionUsageBlock] = None
|
||||
provider_specific_fields: dict = {}
|
||||
reasoning_content: Optional[str] = None
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
|
||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||
if "start" in chunk_data:
|
||||
|
@ -1305,6 +1331,16 @@ class AWSEventStreamDecoder:
|
|||
},
|
||||
"index": index,
|
||||
}
|
||||
elif "reasoningContent" in delta_obj:
|
||||
provider_specific_fields = {
|
||||
"reasoningContent": delta_obj["reasoningContent"],
|
||||
}
|
||||
reasoning_content = self.extract_reasoning_content_str(
|
||||
delta_obj["reasoningContent"]
|
||||
)
|
||||
thinking_blocks = self.translate_thinking_blocks(
|
||||
delta_obj["reasoningContent"]
|
||||
)
|
||||
elif (
|
||||
"contentBlockIndex" in chunk_data
|
||||
): # stop block, no 'start' or 'delta' object
|
||||
|
@ -1321,7 +1357,6 @@ class AWSEventStreamDecoder:
|
|||
}
|
||||
elif "stopReason" in chunk_data:
|
||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||
is_finished = True
|
||||
elif "usage" in chunk_data:
|
||||
usage = ChatCompletionUsageBlock(
|
||||
prompt_tokens=chunk_data.get("inputTokens", 0),
|
||||
|
@ -1329,18 +1364,33 @@ class AWSEventStreamDecoder:
|
|||
total_tokens=chunk_data.get("totalTokens", 0),
|
||||
)
|
||||
|
||||
response = GChunk(
|
||||
text=text,
|
||||
tool_use=tool_use,
|
||||
is_finished=is_finished,
|
||||
finish_reason=finish_reason,
|
||||
usage=usage,
|
||||
index=index,
|
||||
)
|
||||
|
||||
model_response_provider_specific_fields = {}
|
||||
if "trace" in chunk_data:
|
||||
trace = chunk_data.get("trace")
|
||||
response["provider_specific_fields"] = {"trace": trace}
|
||||
model_response_provider_specific_fields["trace"] = trace
|
||||
response = ModelResponseStream(
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=finish_reason,
|
||||
index=index,
|
||||
delta=Delta(
|
||||
content=text,
|
||||
role="assistant",
|
||||
tool_calls=[tool_use] if tool_use else None,
|
||||
provider_specific_fields=(
|
||||
provider_specific_fields
|
||||
if provider_specific_fields
|
||||
else None
|
||||
),
|
||||
thinking_blocks=thinking_blocks,
|
||||
reasoning_content=reasoning_content,
|
||||
),
|
||||
)
|
||||
],
|
||||
usage=usage,
|
||||
provider_specific_fields=model_response_provider_specific_fields,
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise Exception("Received streaming error - {}".format(str(e)))
|
||||
|
@ -1486,7 +1536,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder):
|
|||
sync_stream=sync_stream,
|
||||
)
|
||||
|
||||
def _chunk_parser(self, chunk_data: dict) -> GChunk:
|
||||
def _chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||
return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue