fix(anthropic.py): handle scenario where anthropic returns invalid json string for tool call while streaming

Fixes https://github.com/BerriAI/litellm/issues/5063
2025-04-25 18:54:30 +00:00 · 2024-08-07 09:24:11 -07:00 · 2024-08-07 09:24:11 -07:00 · 4919cc4d25
commit 4919cc4d25
parent 1008f24b16
5 changed files with 105 additions and 7 deletions
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -2,6 +2,7 @@ import copy
 import json
 import os
 import time
 import traceback
 import types
 from enum import Enum
 from functools import partial
@ -36,6 +37,7 @@ from litellm.types.llms.anthropic import (
    AnthropicResponseUsageBlock,
    ContentBlockDelta,
    ContentBlockStart,
    ContentBlockStop,
    ContentJsonBlockDelta,
    ContentTextBlockDelta,
    MessageBlockDelta,
@ -920,7 +922,12 @@ class AnthropicChatCompletion(BaseLLM):
                    model=model, messages=messages, custom_llm_provider="anthropic"
                )
            except Exception as e:
-                raise AnthropicError(status_code=400, message=str(e))
+                raise AnthropicError(
                    status_code=400,
                    message="{}\n{}\nReceived Messages={}".format(
                        str(e), traceback.format_exc(), messages
                    ),
                )
        ## Load Config
        config = litellm.AnthropicConfig.get_config()
@ -1079,10 +1086,30 @@ class ModelResponseIterator:
    def __init__(self, streaming_response, sync_stream: bool):
        self.streaming_response = streaming_response
        self.response_iterator = self.streaming_response
        self.content_blocks: List[ContentBlockDelta] = []
    def check_empty_tool_call_args(self) -> bool:
        """
        Check if the tool call block so far has been an empty string
        """
        args = ""
        # if text content block -> skip
        if len(self.content_blocks) == 0:
            return False
        if self.content_blocks[0]["delta"]["type"] == "text_delta":
            return False
        for block in self.content_blocks:
            if block["delta"]["type"] == "input_json_delta":
                args += block["delta"].get("partial_json", "")  # type: ignore
        if len(args) == 0:
            return True
        return False
    def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
        try:
            verbose_logger.debug(f"\n\nRaw chunk:\n{chunk}\n")
            type_chunk = chunk.get("type", "") or ""
            text = ""
@ -1098,6 +1125,7 @@ class ModelResponseIterator:
                chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
                """
                content_block = ContentBlockDelta(**chunk)  # type: ignore
                self.content_blocks.append(content_block)
                if "text" in content_block["delta"]:
                    text = content_block["delta"]["text"]
                elif "partial_json" in content_block["delta"]:
@ -1116,6 +1144,7 @@ class ModelResponseIterator:
                data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
                """
                content_block_start = ContentBlockStart(**chunk)  # type: ignore
                self.content_blocks = []  # reset content blocks when new block starts
                if content_block_start["content_block"]["type"] == "text":
                    text = content_block_start["content_block"]["text"]
                elif content_block_start["content_block"]["type"] == "tool_use":
@ -1128,6 +1157,20 @@ class ModelResponseIterator:
                        },
                        "index": content_block_start["index"],
                    }
            elif type_chunk == "content_block_stop":
                content_block_stop = ContentBlockStop(**chunk)  # type: ignore
                # check if tool call content block
                is_empty = self.check_empty_tool_call_args()
                if is_empty:
                    tool_use = {
                        "id": None,
                        "type": "function",
                        "function": {
                            "name": None,
                            "arguments": "{}",
                        },
                        "index": content_block_stop["index"],
                    }
            elif type_chunk == "message_delta":
                """
                Anthropic
--- a/litellm/main.py
+++ b/litellm/main.py
@ -5113,7 +5113,9 @@ def stream_chunk_builder(
                prev_index = curr_index
                prev_id = curr_id
-        combined_arguments = "".join(argument_list)
+        combined_arguments = (
            "".join(argument_list) or "{}"
        )  # base case, return empty dict
        tool_calls_list.append(
            {
                "id": id,
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -4346,3 +4346,51 @@ def test_moderation():
 # test_moderation()
@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "claude-3-5-sonnet-20240620"])
 def test_streaming_tool_calls_valid_json_str(model):
    messages = [
        {"role": "user", "content": "Hit the snooze button."},
    ]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "snooze",
                "parameters": {
                    "type": "object",
                    "properties": {},
                    "required": [],
                },
            },
        }
    ]
    stream = litellm.completion(model, messages, tools=tools, stream=True)
    chunks = [*stream]
    print(chunks)
    tool_call_id_arg_map = {}
    curr_tool_call_id = None
    curr_tool_call_str = ""
    for chunk in chunks:
        if chunk.choices[0].delta.tool_calls is not None:
            if chunk.choices[0].delta.tool_calls[0].id is not None:
                # flush prev tool call
                if curr_tool_call_id is not None:
                    tool_call_id_arg_map[curr_tool_call_id] = curr_tool_call_str
                    curr_tool_call_str = ""
                curr_tool_call_id = chunk.choices[0].delta.tool_calls[0].id
                tool_call_id_arg_map[curr_tool_call_id] = ""
            if chunk.choices[0].delta.tool_calls[0].function.arguments is not None:
                curr_tool_call_str += (
                    chunk.choices[0].delta.tool_calls[0].function.arguments
                )
    # flush prev tool call
    if curr_tool_call_id is not None:
        tool_call_id_arg_map[curr_tool_call_id] = curr_tool_call_str
    for k, v in tool_call_id_arg_map.items():
        print("k={}, v={}".format(k, v))
        json.loads(v)  # valid json str
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -2596,8 +2596,8 @@ def streaming_and_function_calling_format_tests(idx, chunk):
@pytest.mark.parametrize(
    "model",
    [
-        "gpt-3.5-turbo",
+        # "gpt-3.5-turbo",
-        "anthropic.claude-3-sonnet-20240229-v1:0",
+        # "anthropic.claude-3-sonnet-20240229-v1:0",
        "claude-3-haiku-20240307",
    ],
 )
@ -2627,7 +2627,7 @@ def test_streaming_and_function_calling(model):
    messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
    try:
-        litellm.set_verbose = True
+        # litellm.set_verbose = True
        response: litellm.CustomStreamWrapper = completion(
            model=model,
            tools=tools,
@ -2639,7 +2639,7 @@ def test_streaming_and_function_calling(model):
        json_str = ""
        for idx, chunk in enumerate(response):
            # continue
-            print("\n{}\n".format(chunk))
+            # print("\n{}\n".format(chunk))
            if idx == 0:
                assert (
                    chunk.choices[0].delta.tool_calls[0].function.arguments is not None
--- a/litellm/types/llms/anthropic.py
+++ b/litellm/types/llms/anthropic.py
@ -141,6 +141,11 @@ class ContentBlockDelta(TypedDict):
    delta: Union[ContentTextBlockDelta, ContentJsonBlockDelta]
 class ContentBlockStop(TypedDict):
    type: Literal["content_block_stop"]
    index: int
 class ToolUseBlock(TypedDict):
    """
    "content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}