Litellm dev 10 14 2024 (#6221)

* fix(__init__.py): expose DualCache, RedisCache, InMemoryCache on root abstract internal file refactors from impacting users * feat(utils.py): handle invalid openai parallel tool calling response Fixes https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 * docs(bedrock.md): clarify all bedrock models are supported Closes https://github.com/BerriAI/litellm/issues/6168#issuecomment-2412082236
2025-04-25 10:44:24 +00:00 · 2024-10-14 22:11:14 -07:00 · 2024-10-14 22:11:14 -07:00 · 39486e2003
commit 39486e2003
parent cda0a993e2
5 changed files with 240 additions and 5 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -79,6 +79,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.secret_managers.main import get_secret
 from litellm.types.llms.openai import (
    AllMessageValues,
+    ChatCompletionAssistantToolCall,
    ChatCompletionNamedToolChoiceParam,
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
@ -89,11 +90,13 @@ from litellm.types.utils import (
    OPENAI_RESPONSE_HEADERS,
    CallTypes,
    ChatCompletionDeltaToolCall,
+    ChatCompletionMessageToolCall,
    Choices,
    CostPerToken,
    Delta,
    Embedding,
    EmbeddingResponse,
+    Function,
    ImageResponse,
    Message,
    ModelInfo,
@ -5612,6 +5615,54 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
    yield model_response_object


+from collections import defaultdict
+
+
+def _handle_invalid_parallel_tool_calls(
+    tool_calls: List[ChatCompletionMessageToolCall],
+):
+    """
+    Handle hallucinated parallel tool call from openai - https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
+
+    Code modified from: https://github.com/phdowling/openai_multi_tool_use_parallel_patch/blob/main/openai_multi_tool_use_parallel_patch.py
+    """
+
+    if tool_calls is None:
+        return
+
+    replacements: Dict[int, List[ChatCompletionMessageToolCall]] = defaultdict(list)
+    for i, tool_call in enumerate(tool_calls):
+        current_function = tool_call.function.name
+        function_args = json.loads(tool_call.function.arguments)
+        if current_function == "multi_tool_use.parallel":
+            verbose_logger.debug(
+                "OpenAI did a weird pseudo-multi-tool-use call, fixing call structure.."
+            )
+            for _fake_i, _fake_tool_use in enumerate(function_args["tool_uses"]):
+                _function_args = _fake_tool_use["parameters"]
+                _current_function = _fake_tool_use["recipient_name"]
+                if _current_function.startswith("functions."):
+                    _current_function = _current_function[len("functions.") :]
+
+                fixed_tc = ChatCompletionMessageToolCall(
+                    id=f"{tool_call.id}_{_fake_i}",
+                    type="function",
+                    function=Function(
+                        name=_current_function, arguments=json.dumps(_function_args)
+                    ),
+                )
+                replacements[i].append(fixed_tc)
+
+    shift = 0
+    for i, replacement in replacements.items():
+        tool_calls[:] = (
+            tool_calls[: i + shift] + replacement + tool_calls[i + shift + 1 :]
+        )
+        shift += len(replacement)
+
+    return tool_calls
+
+
 def convert_to_model_response_object(
    response_object: Optional[dict] = None,
    model_response_object: Optional[
@ -5707,6 +5758,18 @@ def convert_to_model_response_object(
            for idx, choice in enumerate(response_object["choices"]):
                ## HANDLE JSON MODE - anthropic returns single function call]
                tool_calls = choice["message"].get("tool_calls", None)
+                if tool_calls is not None:
+                    _openai_tool_calls = []
+                    for _tc in tool_calls:
+                        _openai_tc = ChatCompletionMessageToolCall(**_tc)
+                        _openai_tool_calls.append(_openai_tc)
+                    fixed_tool_calls = _handle_invalid_parallel_tool_calls(
+                        _openai_tool_calls
+                    )
+
+                    if fixed_tool_calls is not None:
+                        tool_calls = fixed_tool_calls
+
                message: Optional[Message] = None
                finish_reason: Optional[str] = None
                if (
@ -5726,7 +5789,7 @@ def convert_to_model_response_object(
                        content=choice["message"].get("content", None),
                        role=choice["message"]["role"] or "assistant",
                        function_call=choice["message"].get("function_call", None),
-                        tool_calls=choice["message"].get("tool_calls", None),
+                        tool_calls=tool_calls,
                    )
                    finish_reason = choice.get("finish_reason", None)
                if finish_reason is None: