From ec4f665e299bda268af24f58c0545bdb472d4aad Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Wed, 5 Mar 2025 19:33:54 -0800 Subject: [PATCH] Return `signature` on anthropic streaming + migrate to `signature` field instead of `signature_delta` [MINOR bump] (#9021) * Fix missing signature_delta in thinking blocks when streaming from Claude 3.7 (#8797) Co-authored-by: Krish Dholakia * test: update test to enforce signature found * feat(refactor-signature-param-to-be-'signature'-instead-of-'signature_delta'): keeps it in sync with anthropic * fix: fix linting error --------- Co-authored-by: Martin Krasser --- .../litellm_core_utils/prompt_templates/factory.py | 2 +- litellm/llms/anthropic/chat/handler.py | 7 ++++--- litellm/llms/bedrock/chat/converse_transformation.py | 4 ++-- litellm/types/llms/openai.py | 2 +- tests/llm_translation/test_anthropic_completion.py | 12 +++++++++--- tests/local_testing/test_streaming.py | 2 ++ 6 files changed, 19 insertions(+), 10 deletions(-) diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index d8ecd14d38..93c5013fc6 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -2983,7 +2983,7 @@ class BedrockConverseMessagesProcessor: reasoning_content_blocks: List[BedrockContentBlock] = [] for thinking_block in thinking_blocks: reasoning_text = thinking_block.get("thinking") - reasoning_signature = thinking_block.get("signature_delta") + reasoning_signature = thinking_block.get("signature") text_block = BedrockConverseReasoningTextBlock( text=reasoning_text or "", ) diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py index 46c8edae03..04edb40754 100644 --- a/litellm/llms/anthropic/chat/handler.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -527,6 +527,7 @@ class ModelResponseIterator: provider_specific_fields = {} content_block = ContentBlockDelta(**chunk) # type: ignore thinking_blocks: List[ChatCompletionThinkingBlock] = [] + self.content_blocks.append(content_block) if "text" in content_block["delta"]: text = content_block["delta"]["text"] @@ -544,13 +545,13 @@ class ModelResponseIterator: provider_specific_fields["citation"] = content_block["delta"]["citation"] elif ( "thinking" in content_block["delta"] - or "signature_delta" == content_block["delta"] + or "signature" in content_block["delta"] ): thinking_blocks = [ ChatCompletionThinkingBlock( type="thinking", - thinking=content_block["delta"].get("thinking"), - signature_delta=content_block["delta"].get("signature"), + thinking=content_block["delta"].get("thinking") or "", + signature=content_block["delta"].get("signature"), ) ] provider_specific_fields["thinking_blocks"] = thinking_blocks diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 18ec425e44..0b0d55f23d 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -272,7 +272,7 @@ class AmazonConverseConfig(BaseConfig): optional_params["temperature"] = value if param == "top_p": optional_params["topP"] = value - if param == "tools": + if param == "tools" and isinstance(value, list): optional_params = self._add_tools_to_optional_params( optional_params=optional_params, tools=value ) @@ -598,7 +598,7 @@ class AmazonConverseConfig(BaseConfig): if _text is not None: _thinking_block["thinking"] = _text if _signature is not None: - _thinking_block["signature_delta"] = _signature + _thinking_block["signature"] = _signature thinking_blocks_list.append(_thinking_block) return thinking_blocks_list diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 77e729daf2..6dce7f3356 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -360,7 +360,7 @@ class ChatCompletionCachedContent(TypedDict): class ChatCompletionThinkingBlock(TypedDict, total=False): type: Required[Literal["thinking"]] thinking: str - signature_delta: str + signature: str cache_control: Optional[Union[dict, ChatCompletionCachedContent]] diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index df318a8036..20b550e3c2 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -1188,18 +1188,20 @@ def test_anthropic_thinking_output(model): assert isinstance(resp.choices[0].message.thinking_blocks, list) assert len(resp.choices[0].message.thinking_blocks) > 0 + assert resp.choices[0].message.thinking_blocks[0]["signature"] is not None + @pytest.mark.parametrize( "model", [ "anthropic/claude-3-7-sonnet-20250219", - "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0", + # "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0", ], ) def test_anthropic_thinking_output_stream(model): # litellm.set_verbose = True try: - litellm._turn_on_debug() + # litellm._turn_on_debug() resp = litellm.completion( model=model, messages=[{"role": "user", "content": "Tell me a joke."}], @@ -1209,6 +1211,7 @@ def test_anthropic_thinking_output_stream(model): ) reasoning_content_exists = False + signature_block_exists = False for chunk in resp: print(f"chunk 2: {chunk}") if ( @@ -1220,8 +1223,11 @@ def test_anthropic_thinking_output_stream(model): and isinstance(chunk.choices[0].delta.reasoning_content, str) ): reasoning_content_exists = True - break + print(chunk.choices[0].delta.thinking_blocks[0]) + if chunk.choices[0].delta.thinking_blocks[0].get("signature"): + signature_block_exists = True assert reasoning_content_exists + assert signature_block_exists except litellm.Timeout: pytest.skip("Model is timing out") diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index f438fdc5c0..668ad12a48 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -4084,6 +4084,7 @@ def test_reasoning_content_completion(model): ) reasoning_content_exists = False + signature_delta_exists = False for chunk in resp: print(f"chunk 2: {chunk}") if ( @@ -4118,3 +4119,4 @@ def test_is_delta_empty(): audio=None, ) ) +