diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py index 44e5b40380..9fa791e069 100644 --- a/litellm/llms/bedrock/chat/invoke_handler.py +++ b/litellm/llms/bedrock/chat/invoke_handler.py @@ -72,6 +72,9 @@ _response_stream_shape_cache = None bedrock_tool_name_mappings: InMemoryCache = InMemoryCache( max_size_in_memory=50, default_ttl=600 ) +from litellm.llms.bedrock.chat.converse_transformation import AmazonConverseConfig + +converse_config = AmazonConverseConfig() class AmazonCohereChatConfig: @@ -1274,7 +1277,7 @@ class AWSEventStreamDecoder: text = "" tool_use: Optional[ChatCompletionToolCallChunk] = None finish_reason = "" - usage: Optional[ChatCompletionUsageBlock] = None + usage: Optional[Usage] = None provider_specific_fields: dict = {} reasoning_content: Optional[str] = None thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None @@ -1350,12 +1353,7 @@ class AWSEventStreamDecoder: elif "stopReason" in chunk_data: finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop")) elif "usage" in chunk_data: - usage = ChatCompletionUsageBlock( - prompt_tokens=chunk_data.get("inputTokens", 0), - completion_tokens=chunk_data.get("outputTokens", 0), - total_tokens=chunk_data.get("totalTokens", 0), - ) - + usage = converse_config._transform_usage(chunk_data.get("usage", {})) model_response_provider_specific_fields = {} if "trace" in chunk_data: trace = chunk_data.get("trace")