From 39486e2003b49db092c436cc664bc8d11d47c993 Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Mon, 14 Oct 2024 22:11:14 -0700 Subject: [PATCH] Litellm dev 10 14 2024 (#6221) * fix(__init__.py): expose DualCache, RedisCache, InMemoryCache on root abstract internal file refactors from impacting users * feat(utils.py): handle invalid openai parallel tool calling response Fixes https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 * docs(bedrock.md): clarify all bedrock models are supported Closes https://github.com/BerriAI/litellm/issues/6168#issuecomment-2412082236 --- docs/my-website/docs/providers/bedrock.md | 2 +- litellm/__init__.py | 2 +- .../proxy/hooks/parallel_request_limiter.py | 3 +- litellm/utils.py | 65 ++++++- tests/local_testing/test_completion.py | 173 ++++++++++++++++++ 5 files changed, 240 insertions(+), 5 deletions(-) diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 6548714b2..279098d12 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -2,7 +2,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; # AWS Bedrock -Anthropic, Amazon Titan, A121 LLMs are Supported on Bedrock +ALL Bedrock models (Anthropic, Meta, Mistral, Amazon, etc.) are Supported LiteLLM requires `boto3` to be installed on your system for Bedrock requests ```shell diff --git a/litellm/__init__.py b/litellm/__init__.py index f6713646d..5326d1cc6 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -7,7 +7,7 @@ import threading import os from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.caching.caching import Cache +from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache from litellm._logging import ( set_verbose, _turn_on_debug, diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 36e5fecff..f34a9bbac 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -7,9 +7,8 @@ from fastapi import HTTPException from pydantic import BaseModel import litellm -from litellm import ModelResponse +from litellm import DualCache, ModelResponse from litellm._logging import verbose_proxy_logger -from litellm.caching.caching import DualCache from litellm.integrations.custom_logger import CustomLogger from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth diff --git a/litellm/utils.py b/litellm/utils.py index 2457bdf4c..085fe7116 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -79,6 +79,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.secret_managers.main import get_secret from litellm.types.llms.openai import ( AllMessageValues, + ChatCompletionAssistantToolCall, ChatCompletionNamedToolChoiceParam, ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk, @@ -89,11 +90,13 @@ from litellm.types.utils import ( OPENAI_RESPONSE_HEADERS, CallTypes, ChatCompletionDeltaToolCall, + ChatCompletionMessageToolCall, Choices, CostPerToken, Delta, Embedding, EmbeddingResponse, + Function, ImageResponse, Message, ModelInfo, @@ -5612,6 +5615,54 @@ def convert_to_streaming_response(response_object: Optional[dict] = None): yield model_response_object +from collections import defaultdict + + +def _handle_invalid_parallel_tool_calls( + tool_calls: List[ChatCompletionMessageToolCall], +): + """ + Handle hallucinated parallel tool call from openai - https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 + + Code modified from: https://github.com/phdowling/openai_multi_tool_use_parallel_patch/blob/main/openai_multi_tool_use_parallel_patch.py + """ + + if tool_calls is None: + return + + replacements: Dict[int, List[ChatCompletionMessageToolCall]] = defaultdict(list) + for i, tool_call in enumerate(tool_calls): + current_function = tool_call.function.name + function_args = json.loads(tool_call.function.arguments) + if current_function == "multi_tool_use.parallel": + verbose_logger.debug( + "OpenAI did a weird pseudo-multi-tool-use call, fixing call structure.." + ) + for _fake_i, _fake_tool_use in enumerate(function_args["tool_uses"]): + _function_args = _fake_tool_use["parameters"] + _current_function = _fake_tool_use["recipient_name"] + if _current_function.startswith("functions."): + _current_function = _current_function[len("functions.") :] + + fixed_tc = ChatCompletionMessageToolCall( + id=f"{tool_call.id}_{_fake_i}", + type="function", + function=Function( + name=_current_function, arguments=json.dumps(_function_args) + ), + ) + replacements[i].append(fixed_tc) + + shift = 0 + for i, replacement in replacements.items(): + tool_calls[:] = ( + tool_calls[: i + shift] + replacement + tool_calls[i + shift + 1 :] + ) + shift += len(replacement) + + return tool_calls + + def convert_to_model_response_object( response_object: Optional[dict] = None, model_response_object: Optional[ @@ -5707,6 +5758,18 @@ def convert_to_model_response_object( for idx, choice in enumerate(response_object["choices"]): ## HANDLE JSON MODE - anthropic returns single function call] tool_calls = choice["message"].get("tool_calls", None) + if tool_calls is not None: + _openai_tool_calls = [] + for _tc in tool_calls: + _openai_tc = ChatCompletionMessageToolCall(**_tc) + _openai_tool_calls.append(_openai_tc) + fixed_tool_calls = _handle_invalid_parallel_tool_calls( + _openai_tool_calls + ) + + if fixed_tool_calls is not None: + tool_calls = fixed_tool_calls + message: Optional[Message] = None finish_reason: Optional[str] = None if ( @@ -5726,7 +5789,7 @@ def convert_to_model_response_object( content=choice["message"].get("content", None), role=choice["message"]["role"] or "assistant", function_call=choice["message"].get("function_call", None), - tool_calls=choice["message"].get("tool_calls", None), + tool_calls=tool_calls, ) finish_reason = choice.get("finish_reason", None) if finish_reason is None: diff --git a/tests/local_testing/test_completion.py b/tests/local_testing/test_completion.py index ecc3c8034..33c0b67f1 100644 --- a/tests/local_testing/test_completion.py +++ b/tests/local_testing/test_completion.py @@ -4567,3 +4567,176 @@ def test_completion_response_ratelimit_headers(model, stream): assert v != "None" and v is not None assert "x-ratelimit-remaining-requests" in additional_headers assert "x-ratelimit-remaining-tokens" in additional_headers + + +def _openai_hallucinated_tool_call_mock_response( + *args, **kwargs +) -> litellm.ModelResponse: + new_response = MagicMock() + new_response.headers = {"hello": "world"} + + response_object = { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "index": 0, + "message": { + "content": None, + "role": "assistant", + "tool_calls": [ + { + "function": { + "arguments": '{"tool_uses":[{"recipient_name":"product_title","parameters":{"content":"Story Scribe"}},{"recipient_name":"one_liner","parameters":{"content":"Transform interview transcripts into actionable user stories"}}]}', + "name": "multi_tool_use.parallel", + }, + "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s", + "type": "function", + } + ], + }, + "logprobs": None, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21}, + } + from openai import OpenAI + from openai.types.chat.chat_completion import ChatCompletion + + pydantic_obj = ChatCompletion(**response_object) # type: ignore + pydantic_obj.choices[0].message.role = None # type: ignore + new_response.parse.return_value = pydantic_obj + return new_response + + +def test_openai_hallucinated_tool_call(): + """ + Patch for this issue: https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 + + Handle openai invalid tool calling response. + + OpenAI assistant will sometimes return an invalid tool calling response, which needs to be parsed + + - "arguments": "{\"tool_uses\":[{\"recipient_name\":\"product_title\",\"parameters\":{\"content\":\"Story Scribe\"}},{\"recipient_name\":\"one_liner\",\"parameters\":{\"content\":\"Transform interview transcripts into actionable user stories\"}}]}", + + To extract actual tool calls: + + 1. Parse arguments JSON object + 2. Iterate over tool_uses array to call functions: + - get function name from recipient_name value + - parameters will be JSON object for function arguments + """ + import openai + + openai_client = openai.OpenAI() + with patch.object( + openai_client.chat.completions, + "create", + side_effect=_openai_hallucinated_tool_call_mock_response, + ) as mock_response: + response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey! how's it going?"}], + client=openai_client, + ) + print(f"response: {response}") + + response_dict = response.model_dump() + + tool_calls = response_dict["choices"][0]["message"]["tool_calls"] + + print(f"tool_calls: {tool_calls}") + + for idx, tc in enumerate(tool_calls): + if idx == 0: + print(f"tc in test_openai_hallucinated_tool_call: {tc}") + assert tc == { + "function": { + "arguments": '{"content": "Story Scribe"}', + "name": "product_title", + }, + "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_0", + "type": "function", + } + elif idx == 1: + assert tc == { + "function": { + "arguments": '{"content": "Transform interview transcripts into actionable user stories"}', + "name": "one_liner", + }, + "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_1", + "type": "function", + } + + +@pytest.mark.parametrize( + "function_name, expect_modification", + [ + ("multi_tool_use.parallel", True), + ("my-fake-function", False), + ], +) +def test_openai_hallucinated_tool_call_util(function_name, expect_modification): + """ + Patch for this issue: https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 + + Handle openai invalid tool calling response. + + OpenAI assistant will sometimes return an invalid tool calling response, which needs to be parsed + + - "arguments": "{\"tool_uses\":[{\"recipient_name\":\"product_title\",\"parameters\":{\"content\":\"Story Scribe\"}},{\"recipient_name\":\"one_liner\",\"parameters\":{\"content\":\"Transform interview transcripts into actionable user stories\"}}]}", + + To extract actual tool calls: + + 1. Parse arguments JSON object + 2. Iterate over tool_uses array to call functions: + - get function name from recipient_name value + - parameters will be JSON object for function arguments + """ + from litellm.utils import _handle_invalid_parallel_tool_calls + from litellm.types.utils import ChatCompletionMessageToolCall + + response = _handle_invalid_parallel_tool_calls( + tool_calls=[ + ChatCompletionMessageToolCall( + **{ + "function": { + "arguments": '{"tool_uses":[{"recipient_name":"product_title","parameters":{"content":"Story Scribe"}},{"recipient_name":"one_liner","parameters":{"content":"Transform interview transcripts into actionable user stories"}}]}', + "name": function_name, + }, + "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s", + "type": "function", + } + ) + ] + ) + + print(f"response: {response}") + + if expect_modification: + for idx, tc in enumerate(response): + if idx == 0: + assert tc.model_dump() == { + "function": { + "arguments": '{"content": "Story Scribe"}', + "name": "product_title", + }, + "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_0", + "type": "function", + } + elif idx == 1: + assert tc.model_dump() == { + "function": { + "arguments": '{"content": "Transform interview transcripts into actionable user stories"}', + "name": "one_liner", + }, + "id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_1", + "type": "function", + } + else: + assert len(response) == 1 + assert response[0].function.name == function_name