Litellm dev 10 14 2024 (#6221)

* fix(__init__.py): expose DualCache, RedisCache, InMemoryCache on root abstract internal file refactors from impacting users * feat(utils.py): handle invalid openai parallel tool calling response Fixes https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 * docs(bedrock.md): clarify all bedrock models are supported Closes https://github.com/BerriAI/litellm/issues/6168#issuecomment-2412082236
2024-10-14 22:11:14 -07:00 · 2024-10-14 22:11:14 -07:00 · 39486e2003
commit 39486e2003
parent cda0a993e2
5 changed files with 240 additions and 5 deletions
--- a/docs/my-website/docs/providers/bedrock.md
+++ b/docs/my-website/docs/providers/bedrock.md
@ -2,7 +2,7 @@ import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # AWS Bedrock
-Anthropic, Amazon Titan, A121 LLMs are Supported on Bedrock
+ALL Bedrock models (Anthropic, Meta, Mistral, Amazon, etc.) are Supported
 LiteLLM requires `boto3` to be installed on your system for Bedrock requests
 ```shell
--- a/litellm/init.py
+++ b/litellm/init.py
@ -7,7 +7,7 @@ import threading
 import os
 from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
-from litellm.caching.caching import Cache
+from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache
 from litellm._logging import (
    set_verbose,
    _turn_on_debug,
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -7,9 +7,8 @@ from fastapi import HTTPException
 from pydantic import BaseModel
 import litellm
-from litellm import ModelResponse
+from litellm import DualCache, ModelResponse
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
 from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -79,6 +79,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.secret_managers.main import get_secret
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionAssistantToolCall,
    ChatCompletionNamedToolChoiceParam,
    ChatCompletionToolParam,
    ChatCompletionToolParamFunctionChunk,
@ -89,11 +90,13 @@ from litellm.types.utils import (
    OPENAI_RESPONSE_HEADERS,
    CallTypes,
    ChatCompletionDeltaToolCall,
    ChatCompletionMessageToolCall,
    Choices,
    CostPerToken,
    Delta,
    Embedding,
    EmbeddingResponse,
    Function,
    ImageResponse,
    Message,
    ModelInfo,
@ -5612,6 +5615,54 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
    yield model_response_object
 from collections import defaultdict
 def _handle_invalid_parallel_tool_calls(
    tool_calls: List[ChatCompletionMessageToolCall],
 ):
    """
    Handle hallucinated parallel tool call from openai - https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
    Code modified from: https://github.com/phdowling/openai_multi_tool_use_parallel_patch/blob/main/openai_multi_tool_use_parallel_patch.py
    """
    if tool_calls is None:
        return
    replacements: Dict[int, List[ChatCompletionMessageToolCall]] = defaultdict(list)
    for i, tool_call in enumerate(tool_calls):
        current_function = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        if current_function == "multi_tool_use.parallel":
            verbose_logger.debug(
                "OpenAI did a weird pseudo-multi-tool-use call, fixing call structure.."
            )
            for _fake_i, _fake_tool_use in enumerate(function_args["tool_uses"]):
                _function_args = _fake_tool_use["parameters"]
                _current_function = _fake_tool_use["recipient_name"]
                if _current_function.startswith("functions."):
                    _current_function = _current_function[len("functions.") :]
                fixed_tc = ChatCompletionMessageToolCall(
                    id=f"{tool_call.id}_{_fake_i}",
                    type="function",
                    function=Function(
                        name=_current_function, arguments=json.dumps(_function_args)
                    ),
                )
                replacements[i].append(fixed_tc)
    shift = 0
    for i, replacement in replacements.items():
        tool_calls[:] = (
            tool_calls[: i + shift] + replacement + tool_calls[i + shift + 1 :]
        )
        shift += len(replacement)
    return tool_calls
 def convert_to_model_response_object(
    response_object: Optional[dict] = None,
    model_response_object: Optional[
@ -5707,6 +5758,18 @@ def convert_to_model_response_object(
            for idx, choice in enumerate(response_object["choices"]):
                ## HANDLE JSON MODE - anthropic returns single function call]
                tool_calls = choice["message"].get("tool_calls", None)
                if tool_calls is not None:
                    _openai_tool_calls = []
                    for _tc in tool_calls:
                        _openai_tc = ChatCompletionMessageToolCall(**_tc)
                        _openai_tool_calls.append(_openai_tc)
                    fixed_tool_calls = _handle_invalid_parallel_tool_calls(
                        _openai_tool_calls
                    )
                    if fixed_tool_calls is not None:
                        tool_calls = fixed_tool_calls
                message: Optional[Message] = None
                finish_reason: Optional[str] = None
                if (
@ -5726,7 +5789,7 @@ def convert_to_model_response_object(
                        content=choice["message"].get("content", None),
                        role=choice["message"]["role"] or "assistant",
                        function_call=choice["message"].get("function_call", None),
-                        tool_calls=choice["message"].get("tool_calls", None),
+                        tool_calls=tool_calls,
                    )
                    finish_reason = choice.get("finish_reason", None)
                if finish_reason is None:
--- a/tests/local_testing/test_completion.py
+++ b/tests/local_testing/test_completion.py
@ -4567,3 +4567,176 @@ def test_completion_response_ratelimit_headers(model, stream):
        assert v != "None" and v is not None
    assert "x-ratelimit-remaining-requests" in additional_headers
    assert "x-ratelimit-remaining-tokens" in additional_headers
 def _openai_hallucinated_tool_call_mock_response(
    *args, **kwargs
 ) -> litellm.ModelResponse:
    new_response = MagicMock()
    new_response.headers = {"hello": "world"}
    response_object = {
        "id": "chatcmpl-123",
        "object": "chat.completion",
        "created": 1677652288,
        "model": "gpt-3.5-turbo-0125",
        "system_fingerprint": "fp_44709d6fcb",
        "choices": [
            {
                "index": 0,
                "message": {
                    "content": None,
                    "role": "assistant",
                    "tool_calls": [
                        {
                            "function": {
                                "arguments": '{"tool_uses":[{"recipient_name":"product_title","parameters":{"content":"Story Scribe"}},{"recipient_name":"one_liner","parameters":{"content":"Transform interview transcripts into actionable user stories"}}]}',
                                "name": "multi_tool_use.parallel",
                            },
                            "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s",
                            "type": "function",
                        }
                    ],
                },
                "logprobs": None,
                "finish_reason": "stop",
            }
        ],
        "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
    }
    from openai import OpenAI
    from openai.types.chat.chat_completion import ChatCompletion
    pydantic_obj = ChatCompletion(**response_object)  # type: ignore
    pydantic_obj.choices[0].message.role = None  # type: ignore
    new_response.parse.return_value = pydantic_obj
    return new_response
 def test_openai_hallucinated_tool_call():
    """
    Patch for this issue: https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
    Handle openai invalid tool calling response.
    OpenAI assistant will sometimes return an invalid tool calling response, which needs to be parsed
    -           "arguments": "{\"tool_uses\":[{\"recipient_name\":\"product_title\",\"parameters\":{\"content\":\"Story Scribe\"}},{\"recipient_name\":\"one_liner\",\"parameters\":{\"content\":\"Transform interview transcripts into actionable user stories\"}}]}",
    To extract actual tool calls:
    1. Parse arguments JSON object
    2. Iterate over tool_uses array to call functions:
        - get function name from recipient_name value
        - parameters will be JSON object for function arguments
    """
    import openai
    openai_client = openai.OpenAI()
    with patch.object(
        openai_client.chat.completions,
        "create",
        side_effect=_openai_hallucinated_tool_call_mock_response,
    ) as mock_response:
        response = litellm.completion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hey! how's it going?"}],
            client=openai_client,
        )
        print(f"response: {response}")
        response_dict = response.model_dump()
        tool_calls = response_dict["choices"][0]["message"]["tool_calls"]
        print(f"tool_calls: {tool_calls}")
        for idx, tc in enumerate(tool_calls):
            if idx == 0:
                print(f"tc in test_openai_hallucinated_tool_call: {tc}")
                assert tc == {
                    "function": {
                        "arguments": '{"content": "Story Scribe"}',
                        "name": "product_title",
                    },
                    "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_0",
                    "type": "function",
                }
            elif idx == 1:
                assert tc == {
                    "function": {
                        "arguments": '{"content": "Transform interview transcripts into actionable user stories"}',
                        "name": "one_liner",
                    },
                    "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_1",
                    "type": "function",
                }
@pytest.mark.parametrize(
    "function_name, expect_modification",
    [
        ("multi_tool_use.parallel", True),
        ("my-fake-function", False),
    ],
 )
 def test_openai_hallucinated_tool_call_util(function_name, expect_modification):
    """
    Patch for this issue: https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
    Handle openai invalid tool calling response.
    OpenAI assistant will sometimes return an invalid tool calling response, which needs to be parsed
    -           "arguments": "{\"tool_uses\":[{\"recipient_name\":\"product_title\",\"parameters\":{\"content\":\"Story Scribe\"}},{\"recipient_name\":\"one_liner\",\"parameters\":{\"content\":\"Transform interview transcripts into actionable user stories\"}}]}",
    To extract actual tool calls:
    1. Parse arguments JSON object
    2. Iterate over tool_uses array to call functions:
        - get function name from recipient_name value
        - parameters will be JSON object for function arguments
    """
    from litellm.utils import _handle_invalid_parallel_tool_calls
    from litellm.types.utils import ChatCompletionMessageToolCall
    response = _handle_invalid_parallel_tool_calls(
        tool_calls=[
            ChatCompletionMessageToolCall(
                **{
                    "function": {
                        "arguments": '{"tool_uses":[{"recipient_name":"product_title","parameters":{"content":"Story Scribe"}},{"recipient_name":"one_liner","parameters":{"content":"Transform interview transcripts into actionable user stories"}}]}',
                        "name": function_name,
                    },
                    "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s",
                    "type": "function",
                }
            )
        ]
    )
    print(f"response: {response}")
    if expect_modification:
        for idx, tc in enumerate(response):
            if idx == 0:
                assert tc.model_dump() == {
                    "function": {
                        "arguments": '{"content": "Story Scribe"}',
                        "name": "product_title",
                    },
                    "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_0",
                    "type": "function",
                }
            elif idx == 1:
                assert tc.model_dump() == {
                    "function": {
                        "arguments": '{"content": "Transform interview transcripts into actionable user stories"}',
                        "name": "one_liner",
                    },
                    "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_1",
                    "type": "function",
                }
    else:
        assert len(response) == 1
        assert response[0].function.name == function_name