forked from phoenix/litellm-mirror
Litellm dev 10 14 2024 (#6221)
* fix(__init__.py): expose DualCache, RedisCache, InMemoryCache on root abstract internal file refactors from impacting users * feat(utils.py): handle invalid openai parallel tool calling response Fixes https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653 * docs(bedrock.md): clarify all bedrock models are supported Closes https://github.com/BerriAI/litellm/issues/6168#issuecomment-2412082236
This commit is contained in:
parent
cda0a993e2
commit
39486e2003
5 changed files with 240 additions and 5 deletions
|
@ -2,7 +2,7 @@ import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# AWS Bedrock
|
# AWS Bedrock
|
||||||
Anthropic, Amazon Titan, A121 LLMs are Supported on Bedrock
|
ALL Bedrock models (Anthropic, Meta, Mistral, Amazon, etc.) are Supported
|
||||||
|
|
||||||
LiteLLM requires `boto3` to be installed on your system for Bedrock requests
|
LiteLLM requires `boto3` to be installed on your system for Bedrock requests
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -7,7 +7,7 @@ import threading
|
||||||
import os
|
import os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.caching.caching import Cache
|
from litellm.caching.caching import Cache, DualCache, RedisCache, InMemoryCache
|
||||||
from litellm._logging import (
|
from litellm._logging import (
|
||||||
set_verbose,
|
set_verbose,
|
||||||
_turn_on_debug,
|
_turn_on_debug,
|
||||||
|
|
|
@ -7,9 +7,8 @@ from fastapi import HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import ModelResponse
|
from litellm import DualCache, ModelResponse
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching.caching import DualCache
|
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||||
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
||||||
|
|
|
@ -79,6 +79,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
|
ChatCompletionAssistantToolCall,
|
||||||
ChatCompletionNamedToolChoiceParam,
|
ChatCompletionNamedToolChoiceParam,
|
||||||
ChatCompletionToolParam,
|
ChatCompletionToolParam,
|
||||||
ChatCompletionToolParamFunctionChunk,
|
ChatCompletionToolParamFunctionChunk,
|
||||||
|
@ -89,11 +90,13 @@ from litellm.types.utils import (
|
||||||
OPENAI_RESPONSE_HEADERS,
|
OPENAI_RESPONSE_HEADERS,
|
||||||
CallTypes,
|
CallTypes,
|
||||||
ChatCompletionDeltaToolCall,
|
ChatCompletionDeltaToolCall,
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
Choices,
|
Choices,
|
||||||
CostPerToken,
|
CostPerToken,
|
||||||
Delta,
|
Delta,
|
||||||
Embedding,
|
Embedding,
|
||||||
EmbeddingResponse,
|
EmbeddingResponse,
|
||||||
|
Function,
|
||||||
ImageResponse,
|
ImageResponse,
|
||||||
Message,
|
Message,
|
||||||
ModelInfo,
|
ModelInfo,
|
||||||
|
@ -5612,6 +5615,54 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
|
||||||
yield model_response_object
|
yield model_response_object
|
||||||
|
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_invalid_parallel_tool_calls(
|
||||||
|
tool_calls: List[ChatCompletionMessageToolCall],
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Handle hallucinated parallel tool call from openai - https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
|
||||||
|
|
||||||
|
Code modified from: https://github.com/phdowling/openai_multi_tool_use_parallel_patch/blob/main/openai_multi_tool_use_parallel_patch.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
if tool_calls is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
replacements: Dict[int, List[ChatCompletionMessageToolCall]] = defaultdict(list)
|
||||||
|
for i, tool_call in enumerate(tool_calls):
|
||||||
|
current_function = tool_call.function.name
|
||||||
|
function_args = json.loads(tool_call.function.arguments)
|
||||||
|
if current_function == "multi_tool_use.parallel":
|
||||||
|
verbose_logger.debug(
|
||||||
|
"OpenAI did a weird pseudo-multi-tool-use call, fixing call structure.."
|
||||||
|
)
|
||||||
|
for _fake_i, _fake_tool_use in enumerate(function_args["tool_uses"]):
|
||||||
|
_function_args = _fake_tool_use["parameters"]
|
||||||
|
_current_function = _fake_tool_use["recipient_name"]
|
||||||
|
if _current_function.startswith("functions."):
|
||||||
|
_current_function = _current_function[len("functions.") :]
|
||||||
|
|
||||||
|
fixed_tc = ChatCompletionMessageToolCall(
|
||||||
|
id=f"{tool_call.id}_{_fake_i}",
|
||||||
|
type="function",
|
||||||
|
function=Function(
|
||||||
|
name=_current_function, arguments=json.dumps(_function_args)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
replacements[i].append(fixed_tc)
|
||||||
|
|
||||||
|
shift = 0
|
||||||
|
for i, replacement in replacements.items():
|
||||||
|
tool_calls[:] = (
|
||||||
|
tool_calls[: i + shift] + replacement + tool_calls[i + shift + 1 :]
|
||||||
|
)
|
||||||
|
shift += len(replacement)
|
||||||
|
|
||||||
|
return tool_calls
|
||||||
|
|
||||||
|
|
||||||
def convert_to_model_response_object(
|
def convert_to_model_response_object(
|
||||||
response_object: Optional[dict] = None,
|
response_object: Optional[dict] = None,
|
||||||
model_response_object: Optional[
|
model_response_object: Optional[
|
||||||
|
@ -5707,6 +5758,18 @@ def convert_to_model_response_object(
|
||||||
for idx, choice in enumerate(response_object["choices"]):
|
for idx, choice in enumerate(response_object["choices"]):
|
||||||
## HANDLE JSON MODE - anthropic returns single function call]
|
## HANDLE JSON MODE - anthropic returns single function call]
|
||||||
tool_calls = choice["message"].get("tool_calls", None)
|
tool_calls = choice["message"].get("tool_calls", None)
|
||||||
|
if tool_calls is not None:
|
||||||
|
_openai_tool_calls = []
|
||||||
|
for _tc in tool_calls:
|
||||||
|
_openai_tc = ChatCompletionMessageToolCall(**_tc)
|
||||||
|
_openai_tool_calls.append(_openai_tc)
|
||||||
|
fixed_tool_calls = _handle_invalid_parallel_tool_calls(
|
||||||
|
_openai_tool_calls
|
||||||
|
)
|
||||||
|
|
||||||
|
if fixed_tool_calls is not None:
|
||||||
|
tool_calls = fixed_tool_calls
|
||||||
|
|
||||||
message: Optional[Message] = None
|
message: Optional[Message] = None
|
||||||
finish_reason: Optional[str] = None
|
finish_reason: Optional[str] = None
|
||||||
if (
|
if (
|
||||||
|
@ -5726,7 +5789,7 @@ def convert_to_model_response_object(
|
||||||
content=choice["message"].get("content", None),
|
content=choice["message"].get("content", None),
|
||||||
role=choice["message"]["role"] or "assistant",
|
role=choice["message"]["role"] or "assistant",
|
||||||
function_call=choice["message"].get("function_call", None),
|
function_call=choice["message"].get("function_call", None),
|
||||||
tool_calls=choice["message"].get("tool_calls", None),
|
tool_calls=tool_calls,
|
||||||
)
|
)
|
||||||
finish_reason = choice.get("finish_reason", None)
|
finish_reason = choice.get("finish_reason", None)
|
||||||
if finish_reason is None:
|
if finish_reason is None:
|
||||||
|
|
|
@ -4567,3 +4567,176 @@ def test_completion_response_ratelimit_headers(model, stream):
|
||||||
assert v != "None" and v is not None
|
assert v != "None" and v is not None
|
||||||
assert "x-ratelimit-remaining-requests" in additional_headers
|
assert "x-ratelimit-remaining-requests" in additional_headers
|
||||||
assert "x-ratelimit-remaining-tokens" in additional_headers
|
assert "x-ratelimit-remaining-tokens" in additional_headers
|
||||||
|
|
||||||
|
|
||||||
|
def _openai_hallucinated_tool_call_mock_response(
|
||||||
|
*args, **kwargs
|
||||||
|
) -> litellm.ModelResponse:
|
||||||
|
new_response = MagicMock()
|
||||||
|
new_response.headers = {"hello": "world"}
|
||||||
|
|
||||||
|
response_object = {
|
||||||
|
"id": "chatcmpl-123",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1677652288,
|
||||||
|
"model": "gpt-3.5-turbo-0125",
|
||||||
|
"system_fingerprint": "fp_44709d6fcb",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": None,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"tool_uses":[{"recipient_name":"product_title","parameters":{"content":"Story Scribe"}},{"recipient_name":"one_liner","parameters":{"content":"Transform interview transcripts into actionable user stories"}}]}',
|
||||||
|
"name": "multi_tool_use.parallel",
|
||||||
|
},
|
||||||
|
"id": "call_IzGXwVa5OfBd9XcCJOkt2q0s",
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": "stop",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
|
||||||
|
}
|
||||||
|
from openai import OpenAI
|
||||||
|
from openai.types.chat.chat_completion import ChatCompletion
|
||||||
|
|
||||||
|
pydantic_obj = ChatCompletion(**response_object) # type: ignore
|
||||||
|
pydantic_obj.choices[0].message.role = None # type: ignore
|
||||||
|
new_response.parse.return_value = pydantic_obj
|
||||||
|
return new_response
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_hallucinated_tool_call():
|
||||||
|
"""
|
||||||
|
Patch for this issue: https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
|
||||||
|
|
||||||
|
Handle openai invalid tool calling response.
|
||||||
|
|
||||||
|
OpenAI assistant will sometimes return an invalid tool calling response, which needs to be parsed
|
||||||
|
|
||||||
|
- "arguments": "{\"tool_uses\":[{\"recipient_name\":\"product_title\",\"parameters\":{\"content\":\"Story Scribe\"}},{\"recipient_name\":\"one_liner\",\"parameters\":{\"content\":\"Transform interview transcripts into actionable user stories\"}}]}",
|
||||||
|
|
||||||
|
To extract actual tool calls:
|
||||||
|
|
||||||
|
1. Parse arguments JSON object
|
||||||
|
2. Iterate over tool_uses array to call functions:
|
||||||
|
- get function name from recipient_name value
|
||||||
|
- parameters will be JSON object for function arguments
|
||||||
|
"""
|
||||||
|
import openai
|
||||||
|
|
||||||
|
openai_client = openai.OpenAI()
|
||||||
|
with patch.object(
|
||||||
|
openai_client.chat.completions,
|
||||||
|
"create",
|
||||||
|
side_effect=_openai_hallucinated_tool_call_mock_response,
|
||||||
|
) as mock_response:
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey! how's it going?"}],
|
||||||
|
client=openai_client,
|
||||||
|
)
|
||||||
|
print(f"response: {response}")
|
||||||
|
|
||||||
|
response_dict = response.model_dump()
|
||||||
|
|
||||||
|
tool_calls = response_dict["choices"][0]["message"]["tool_calls"]
|
||||||
|
|
||||||
|
print(f"tool_calls: {tool_calls}")
|
||||||
|
|
||||||
|
for idx, tc in enumerate(tool_calls):
|
||||||
|
if idx == 0:
|
||||||
|
print(f"tc in test_openai_hallucinated_tool_call: {tc}")
|
||||||
|
assert tc == {
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"content": "Story Scribe"}',
|
||||||
|
"name": "product_title",
|
||||||
|
},
|
||||||
|
"id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_0",
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
elif idx == 1:
|
||||||
|
assert tc == {
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"content": "Transform interview transcripts into actionable user stories"}',
|
||||||
|
"name": "one_liner",
|
||||||
|
},
|
||||||
|
"id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_1",
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"function_name, expect_modification",
|
||||||
|
[
|
||||||
|
("multi_tool_use.parallel", True),
|
||||||
|
("my-fake-function", False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_openai_hallucinated_tool_call_util(function_name, expect_modification):
|
||||||
|
"""
|
||||||
|
Patch for this issue: https://community.openai.com/t/model-tries-to-call-unknown-function-multi-tool-use-parallel/490653
|
||||||
|
|
||||||
|
Handle openai invalid tool calling response.
|
||||||
|
|
||||||
|
OpenAI assistant will sometimes return an invalid tool calling response, which needs to be parsed
|
||||||
|
|
||||||
|
- "arguments": "{\"tool_uses\":[{\"recipient_name\":\"product_title\",\"parameters\":{\"content\":\"Story Scribe\"}},{\"recipient_name\":\"one_liner\",\"parameters\":{\"content\":\"Transform interview transcripts into actionable user stories\"}}]}",
|
||||||
|
|
||||||
|
To extract actual tool calls:
|
||||||
|
|
||||||
|
1. Parse arguments JSON object
|
||||||
|
2. Iterate over tool_uses array to call functions:
|
||||||
|
- get function name from recipient_name value
|
||||||
|
- parameters will be JSON object for function arguments
|
||||||
|
"""
|
||||||
|
from litellm.utils import _handle_invalid_parallel_tool_calls
|
||||||
|
from litellm.types.utils import ChatCompletionMessageToolCall
|
||||||
|
|
||||||
|
response = _handle_invalid_parallel_tool_calls(
|
||||||
|
tool_calls=[
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
**{
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"tool_uses":[{"recipient_name":"product_title","parameters":{"content":"Story Scribe"}},{"recipient_name":"one_liner","parameters":{"content":"Transform interview transcripts into actionable user stories"}}]}',
|
||||||
|
"name": function_name,
|
||||||
|
},
|
||||||
|
"id": "call_IzGXwVa5OfBd9XcCJOkt2q0s",
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
|
||||||
|
if expect_modification:
|
||||||
|
for idx, tc in enumerate(response):
|
||||||
|
if idx == 0:
|
||||||
|
assert tc.model_dump() == {
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"content": "Story Scribe"}',
|
||||||
|
"name": "product_title",
|
||||||
|
},
|
||||||
|
"id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_0",
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
elif idx == 1:
|
||||||
|
assert tc.model_dump() == {
|
||||||
|
"function": {
|
||||||
|
"arguments": '{"content": "Transform interview transcripts into actionable user stories"}',
|
||||||
|
"name": "one_liner",
|
||||||
|
},
|
||||||
|
"id": "call_IzGXwVa5OfBd9XcCJOkt2q0s_1",
|
||||||
|
"type": "function",
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
assert len(response) == 1
|
||||||
|
assert response[0].function.name == function_name
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue